Commit e26527ac authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] merge dev-phylo

parents a2fd553a 3e6c4d4a
......@@ -17,11 +17,10 @@ Adaptative Phylo binaries
module Main where
import Data.Aeson
import Data.ByteString.Lazy (ByteString)
import Data.Maybe (isJust, fromJust)
import Data.List (concat, nub, isSuffixOf, take)
import Data.List (concat, nub, isSuffixOf)
import Data.String (String)
import Data.Text (Text, unwords, unpack)
import Data.Text (Text, unwords, unpack, replace)
import Crypto.Hash.SHA256 (hash)
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
......@@ -31,20 +30,25 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicDistance')
import GHC.IO (FilePath)
import Prelude (Either(..))
import Prelude (Either(Left, Right))
import System.Environment
import System.Directory (listDirectory)
import System.Directory (listDirectory,doesFileExist)
import Control.Concurrent.Async (mapConcurrently)
import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import qualified Data.Text as T
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
---------------
......@@ -66,7 +70,7 @@ getFilesFromPath path = do
-- | To read and decode a Json file
readJson :: FilePath -> IO ByteString
readJson :: FilePath -> IO Lazy.ByteString
readJson path = Lazy.readFile path
......@@ -125,6 +129,92 @@ fileToDocs parser path lst = do
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-- Config time parameters to label
timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of
Year p s f -> ("time"<> "_"<> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: Config -> [Char]
seaToLabel config = case (seaElevation config) of
Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity))
sensToLabel :: Config -> [Char]
sensToLabel config = case (phyloProximity config) of
Hamming -> undefined
WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s)
cliqueToLabel :: Config -> [Char]
cliqueToLabel config = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
syncToLabel :: Config -> [Char]
syncToLabel config = case (phyloSynchrony config) of
ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
ByProximityDistribution _ _ -> undefined
qualToConfig :: Config -> [Char]
qualToConfig config = case (phyloQuality config) of
Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
-- To set up the export file's label from the configuration
configToLabel :: Config -> [Char]
configToLabel config = outputPath config
<> (unpack $ phyloName config)
<> "-" <> (timeToLabel config)
<> "-scale_" <> (show (phyloLevel config))
<> "-" <> (seaToLabel config)
<> "-" <> (sensToLabel config)
<> "-" <> (cliqueToLabel config)
<> "-level_" <> (show (_qua_granularity $ phyloQuality config))
<> "-" <> (syncToLabel config)
<> ".dot"
-- To write a sha256 from a set of config's parameters
configToSha :: PhyloStage -> Config -> [Char]
configToSha stage config = unpack
$ replace "/" "-"
$ T.pack (show (hash $ C8.pack label))
where
label :: [Char]
label = case stage of
PhyloWithCliques -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
PhyloWithLinks -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
<> (sensToLabel config)
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloLevel config))
writePhylo :: [Char] -> Phylo -> IO ()
writePhylo path phylo = Lazy.writeFile path $ encode phylo
readPhylo :: [Char] -> IO Phylo
readPhylo path = do
phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
case phyloJson of
Left err -> do
putStrLn err
undefined
Right phylo -> pure phylo
--------------
-- | Main | --
--------------
......@@ -148,9 +238,45 @@ main = do
corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the Phylo"
let phylo = toPhylo corpus mapList config
printIOMsg "Reconstruct the phylo"
let phyloWithCliquesFile = (outputPath config) <> "phyloWithCliques_" <> (configToSha PhyloWithCliques config) <> ".json"
let phyloWithLinksFile = (outputPath config) <> "phyloWithLinks_" <> (configToSha PhyloWithLinks config) <> ".json"
phyloWithCliquesExists <- doesFileExist phyloWithCliquesFile
phyloWithLinksExists <- doesFileExist phyloWithLinksFile
-- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
-- writePhylo phyloWithCliquesFile phyloStep
-- let phylo = toPhylo (setConfig config phyloStep)
phyloWithLinks <- if phyloWithLinksExists
then do
printIOMsg "Reconstruct the phylo from an existing file with intertemporal links"
readPhylo phyloWithLinksFile
else do
if phyloWithCliquesExists
then do
printIOMsg "Reconstruct the phylo from an existing file with cliques"
phyloWithCliques <- readPhylo phyloWithCliquesFile
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithCliques <- pure $ toPhyloStep corpus mapList config
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
writePhylo phyloWithLinksFile phyloWithLinks
-- | probes
......@@ -162,33 +288,8 @@ main = do
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport phylo
let clq = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s -> "clique_" <> (show s)
let sensibility = case (phyloProximity config) of
Hamming -> undefined
WeightedLogJaccard s -> (show s)
let sync = case (phyloSynchrony config) of
ByProximityThreshold t _ _ _ -> (show t)
ByProximityDistribution _ _ -> undefined
-- to be improved
-- let br_length = case (take 1 $ exportFilter config) of
-- ByBranchSize t -> (show t)
let output = (outputPath config)
<> (unpack $ phyloName config)
<> "-" <> clq
<> "-level_" <> (show (phyloLevel config))
<> "-sens_" <> sensibility
-- <> "-lenght_" <> br_length
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-sync_" <> sync
<> ".dot"
let dot = toPhyloExport (setConfig config phyloWithLinks)
let output = configToLabel config
dotToFile output dot
......@@ -89,17 +89,11 @@ library:
- Gargantext.Core.Text.Flow
- Gargantext.Core.Viz.Graph
- Gargantext.Core.Viz.Graph.Index
- Gargantext.Core.Viz.Phylo
- Gargantext.Core.Viz.AdaptativePhylo
- Gargantext.Core.Viz.Phylo.PhyloMaker
- Gargantext.Core.Viz.Phylo.Tools
- Gargantext.Core.Viz.Phylo.PhyloTools
- Gargantext.Core.Viz.Phylo.PhyloExport
- Gargantext.Core.Viz.Phylo.SynchronicClustering
- Gargantext.Core.Viz.Phylo.Example
- Gargantext.Core.Viz.Phylo.LevelMaker
- Gargantext.Core.Viz.Phylo.View.Export
- Gargantext.Core.Viz.Phylo.View.ViewMaker
- Gargantext.Core.Viz.Types
dependencies:
- HSvm
......@@ -135,6 +129,7 @@ library:
- conduit-extra
- containers
- contravariant
- cryptohash
- crawlerHAL
- crawlerISTEX
- crawlerIsidore
......@@ -312,31 +307,6 @@ executables:
- unordered-containers
- full-text-search
gargantext-phylo:
main: Main.hs
source-dirs: bin/gargantext-phylo
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- aeson
- async
- base
- bytestring
- containers
- directory
- gargantext
- vector
- parallel
- cassava
- ini
- optparse-generic
- split
- unordered-containers
gargantext-adaptative-phylo:
main: Main.hs
source-dirs: bin/gargantext-adaptative-phylo
......@@ -360,7 +330,8 @@ executables:
- ini
- optparse-generic
- split
- unordered-containers
- unordered-containers
- cryptohash
gargantext-import:
main: Main.hs
......
......@@ -66,7 +66,7 @@ import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Query.Tree (tree, TreeMode(..))
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Gargantext.Core.Viz.Phylo.Legacy.LegacyAPI (PhyloAPI, phyloAPI)
import qualified Gargantext.API.Node.Share as Share
import qualified Gargantext.API.Node.Update as Update
import qualified Gargantext.API.Search as Search
......
......@@ -61,17 +61,18 @@ import Data.Set (Set)
import Data.Set (fromList, toList, isSubsetOf)
import Data.Graph.Inductive hiding (Graph, neighbors, subgraph, (&))
import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGraphUfromEdges)
import Gargantext.Core.Viz.Graph.Tools (cooc2graph', Threshold)
import Gargantext.Core.Viz.Graph.Tools (cooc2graph',cooc2graph'', Threshold)
import Gargantext.Core.Methods.Distances (Distance)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex)
import Gargantext.Core.Viz.AdaptativePhylo
-- import Debug.Trace (trace)
type Graph = Graph_Undirected
type Neighbor = Node
-- | getMaxCliques
-- TODO chose distance order
getMaxCliques :: Ord a => Distance -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques d t m = map fromIndices $ getMaxCliques' t m'
getMaxCliques :: Ord a => CliqueFilter -> Distance -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques f d t m = map fromIndices $ getMaxCliques' t m'
where
m' = toIndex to m
(to,from) = createIndices m
......@@ -81,7 +82,10 @@ getMaxCliques d t m = map fromIndices $ getMaxCliques' t m'
getMaxCliques' t' n = maxCliques graph
where
graph = mkGraphUfromEdges (Map.keys n')
n' = cooc2graph' d t' n
-- n' = cooc2graph' d t' n
n' = case f of ByThreshold -> cooc2graph' d t' n
ByNeighbours -> cooc2graph'' d t' n
maxCliques :: Graph -> [[Node]]
maxCliques g = map (\n -> subMaxCliques g (n:ns)) ns & concat & takeMax
......
......@@ -72,6 +72,15 @@ data Proximity =
-- , _wlj_elevation :: Double
-}
}
| WeightedLogSim
{ _wlj_sensibility :: Double
{-
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-}
}
| Hamming
deriving (Show,Generic,Eq)
......@@ -99,13 +108,16 @@ data TimeUnit =
, _year_matchingFrame :: Int }
deriving (Show,Generic,Eq)
data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
data Clique =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
| MaxClique
{ _mcl_size :: Int }
{ _mcl_size :: Int
, _mcl_threshold :: Double
, _mcl_filter :: CliqueFilter }
deriving (Show,Generic,Eq)
......@@ -124,6 +136,7 @@ data Config =
, phyloLevel :: Int
, phyloProximity :: Proximity
, seaElevation :: SeaElevation
, findAncestors :: Bool
, phyloSynchrony :: Synchrony
, phyloQuality :: Quality
, timeUnit :: TimeUnit
......@@ -144,10 +157,11 @@ defaultConfig =
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10
, seaElevation = Constante 0.1 0.1
, phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
, phyloQuality = Quality 100 1
, findAncestors = True
, phyloSynchrony = ByProximityThreshold 0.1 10 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0 1
, timeUnit = Year 3 1 5
, clique = MaxClique 0
, clique = MaxClique 0 3 ByNeighbours
, exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2]
......@@ -163,6 +177,8 @@ instance FromJSON SeaElevation
instance ToJSON SeaElevation
instance FromJSON TimeUnit
instance ToJSON TimeUnit
instance FromJSON CliqueFilter
instance ToJSON CliqueFilter
instance FromJSON Clique
instance ToJSON Clique
instance FromJSON PhyloLabel
......@@ -423,5 +439,17 @@ makeLenses ''PhyloBranch
-- | JSON instances | --
------------------------
instance FromJSON Phylo
instance ToJSON Phylo
instance FromJSON PhyloParam
instance ToJSON PhyloParam
instance FromJSON PhyloPeriod
instance ToJSON PhyloPeriod
instance FromJSON PhyloLevel
instance ToJSON PhyloLevel
instance FromJSON Software
instance ToJSON Software
instance FromJSON PhyloGroup
instance ToJSON PhyloGroup
$(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)
......@@ -10,8 +10,7 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Graph
where
......
......@@ -71,6 +71,37 @@ cooc2graphWith :: PartitionMethod
cooc2graphWith Louvain = cooc2graphWith' (cLouvain "1")
cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
cooc2graph'' :: Ord t => Distance
-> Double
-> Map (t, t) Int
-> Map (Index, Index) Double
cooc2graph'' distance threshold myCooc = neighbouMap
where
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
matCooc = map2mat Triangular 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
neighbouMap = filterByNeighbours threshold
$ mat2map distanceMat
-- Quentin
filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
filterByNeighbours threshold distanceMap = filteredMap
where
indexes :: [Index]
indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
filteredMap :: Map (Index, Index) Double
filteredMap = Map.fromList
$ List.concat
$ map (\idx ->
let selected = List.reverse
$ List.sortOn snd
$ Map.toList
$ Map.filter (> 0)
$ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
in List.take (round threshold) selected
) indexes
cooc2graphWith' :: ToComId a
=> Partitions a
......@@ -129,6 +160,49 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
myCooc' bridgeness' confluence' partitions
-- cooc2graph :: Distance
-- -> Threshold
-- -> (Map (Text, Text) Int)
-- -> IO Graph
-- cooc2graph distance threshold myCooc = do
-- printDebug "cooc2graph" distance
-- let
-- -- TODO remove below
-- theMatrix = Map.fromList $ HashMap.toList myCooc
-- (ti, _) = createIndices theMatrix
-- myCooc' = toIndex ti theMatrix
-- matCooc = map2mat 0 (Map.size ti)
-- $ Map.filterWithKey (\(a,b) _ -> a /= b)
-- $ Map.filter (> 1) myCooc'
-- distanceMat = measure distance matCooc
-- distanceMap = Map.filter (> threshold) $ mat2map distanceMat
-- nodesApprox :: Int
-- nodesApprox = n'
-- where
-- (as, bs) = List.unzip $ Map.keys distanceMap
-- n' = Set.size $ Set.fromList $ as <> bs
-- ClustersParams rivers _level = clustersParams nodesApprox
-- printDebug "Start" ("partitions" :: Text)
-- partitions <- if (Map.size distanceMap > 0)
-- -- then iLouvainMap 100 10 distanceMap
-- -- then hLouvain distanceMap
-- then doPartitions distanceMap
-- else panic "Text.Flow: DistanceMap is empty"
-- printDebug "End" ("partitions" :: Text)
-- let
-- -- bridgeness' = distanceMap
-- bridgeness' = trace ("Rivers: " <> show rivers)
-- $ bridgeness rivers partitions distanceMap
-- confluence' = confluence (Map.keys bridgeness') 3 True False
-- pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
-- myCooc' bridgeness' confluence' partitions
------------------------------------------------------------------------
------------------------------------------------------------------------
data ClustersParams = ClustersParams { bridgness :: Double
......
......@@ -25,7 +25,7 @@ one 8, e54847.
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Phylo where
module Gargantext.Core.Viz.LegacyPhylo where
import Control.DeepSeq
import Control.Lens (makeLenses)
......@@ -261,11 +261,12 @@ data LouvainParams = LouvainParams
-- | Proximity constructors
data Proximity = WeightedLogJaccard WLJParams
| WeightedLogSim WLJParams
| Hamming HammingParams
| Filiation
deriving (Generic, Show, Eq, Read)
-- | Parameters for WeightedLogJaccard proximity
-- | Parameters for WeightedLogJaccard and WeightedLogSim proximity
data WLJParams = WLJParams
{ _wlj_threshold :: !Double
, _wlj_sensibility :: !Double
......
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Aggregates
where
import Control.Parallel.Strategies
import Gargantext.Prelude hiding (elem)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Debug.Trace (trace)
import Data.List (partition, concat, nub, elem, sort, (++), null, union)
import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, elems, keys, unionWith, mapWithKey)
import Data.Set (size)
import Data.Text (Text, unwords)
import Data.Vector (Vector)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
---------------------
-- | Foundations | --
---------------------
-- | Extract all the labels of a termList
termListToNgrams :: TermList -> [Ngrams]
termListToNgrams = map (\(lbl,_) -> unwords lbl)
-------------------
-- | Documents | --
-------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("----\nGroup docs by periods\n") $ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs :: Vector Ngrams -> [(Date,Text)] -> [Document]
parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
-----------------
-- | Periods | --
-----------------
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "initPeriods" l, last' "initPeriods" l))
$ chunkAlong g s [start .. end]
--------------
-- | Cooc | --
--------------
-- | To transform a tuple of group's information into a coocurency Matrix
toCooc :: [([Int],Double)] -> Map (Int, Int) Double
toCooc l = map (/docs)
$ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat
$ map (\x -> listToFullCombi $ fst x) l
where
--------------------------------------
idx :: [Int]
idx = nub $ concat $ map fst l
--------------------------------------
docs :: Double
docs = sum $ map snd l
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = fromList $ map (\x -> (x,0)) $ listToFullCombi idx
--------------------------------------
-- | To reduce a coocurency Matrix to some keys
getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
&& (elem (snd k) idx)) cooc
-- | To get a coocurency Matrix related to a given list of Periods
getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
where
--------------------------------------
-- | Here we need to go back to the level 1 (aka : the Fis level)
gs :: [PhyloGroup]
gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-------------
-- | Fis | --
-------------
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFis keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
| null l' = True
| length l' > length l = False
| (union l l') == l = True
| otherwise = False
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested m =
let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ getClique f') (Set.toList $ getClique f)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ getClique f) (Set.toList $ getClique f')) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
in fromList $ zip (keys m) fis'
-- | Choose if we use a set of Fis from a file or if we have to create them
docsToFis :: Map (Date,Date) [Document] -> Phylo -> Map (Date, Date) [PhyloFis]
docsToFis m p = if (null $ getPhyloFis p)
then trace("----\nRebuild the Fis from scratch\n")
$ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n")
$ unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m) (getPhyloFis p)
-- | Process some filters on top of a set of Fis
refineFis :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
refineFis fis k s t = traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
-----------------
-- | Tracers | --
-----------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
<> "support : " <> show (countSup 1 supps) <> " (>1) "
<> show (countSup 2 supps) <> " (>2) "
<> show (countSup 3 supps) <> " (>3) "
<> show (countSup 4 supps) <> " (>4) "
<> show (countSup 5 supps) <> " (>5) "
<> show (countSup 6 supps) <> " (>6)\n"
<> "clique size : " <> show (countSup 1 ngrms) <> " (>1) "
<> show (countSup 2 ngrms) <> " (>2) "
<> show (countSup 3 ngrms) <> " (>3) "
<> show (countSup 4 ngrms) <> " (>4) "
<> show (countSup 5 ngrms) <> " (>5) "
<> show (countSup 6 ngrms) <> " (>6)\n"
) m
where
--------------------------------------
countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l
--------------------------------------
supps :: [Double]
supps = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
--------------------------------------
ngrms :: [Double]
ngrms = sort $ map (\f -> fromIntegral $ size $ _phyloFis_clique f) $ concat $ elems m
--------------------------------------
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.BranchMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),sortOn,sort,null,intersect,union,delete)
import Data.Map (Map,(!), fromListWith, elems)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Cluster
import Gargantext.Core.Viz.Phylo.Aggregates
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace)
---------------------------
-- | Readability links | --
---------------------------
getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
getFramedPeriod :: [PhyloGroup] -> (Date,Date)
getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
getGroupsNgrams :: [PhyloGroup] -> [Int]
getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
-- | Process a Jaccard on top of two set of Branch Peaks
areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
/ ((fromIntegral . length) $ union ns ns')) >= thr
-- | Get the framing period of a branch ([[PhyloGroup]])
getBranchPeriod :: [PhyloGroup] -> (Date,Date)
getBranchPeriod gs =
let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
-- | Get the Nth most coocurent Ngrams in a list of Groups
getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
getGroupsPeaks gs nth p = getNthMostOcc nth
$ getSubCooc (getGroupsNgrams gs)
$ getCooc (getGroupsPeriods gs) p
-- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
(getGroupsPeaks gs (getPhyloReBranchNth p) p)
(getGroupsPeaks gs' (getPhyloReBranchNth p) p))
&& ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
&& (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
) branches
-- | Try to connect a focused branch to other candidate branches by finding the best pointers
reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
reBranch p branch candidates =
let newLinks = map (\branch' ->
let pointers = map (\g ->
-- define pairs of candidates groups
let pairs = listToPairs
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
-- process the matching between the pairs and the current group
in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
in if (g2 == g3)
then mem ++ [(getGroupId g,(getGroupId g2,s))]
else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
) branch
pointers' = pointers `using` parList rdeepseq
-- keep the best pointer between the focused branch and the current candidates
in head' "reBranch" $ reverse $ sortOn (snd . snd)
$ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
) candidates
newLinks' = newLinks `using` parList rdeepseq
in newLinks'
reLinkPhyloBranches :: Level -> Phylo -> Phylo
reLinkPhyloBranches lvl p =
let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
$ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
([],branches) branches
in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
where
branches :: [[PhyloGroup]]
branches = elems
$ fromListWith (++)
$ foldl' (\mem g -> case getGroupBranchId g of
Nothing -> mem
Just i -> mem ++ [(i,[g])] )
[] $ getGroupsWithLevel lvl p
------------------
-- | Branches | --
------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
graphToBranches groups = Map.fromList
$ concat
$ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
$ zip [1..]
$ relatedComp
$ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterGroupWithLevel (\g ->
let bIdx = branches ! (getGroupId g)
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
where
--------------------------------------
branches :: Map PhyloGroupId Int
branches = graphToBranches (getGroupsWithLevel lvl p)
--------------------------------------
-- trace' bs = trace bs
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Phylo.Cluster
where
import Control.Parallel.Strategies
import Data.Graph.Clustering.Louvain.CplusPlus
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
import Data.Map (Map, fromList, mapKeys)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
--------------
-- | Algo | --
--------------
relatedComp :: Eq a => [[a]] -> [[a]]
relatedComp graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
<$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
<$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
where
--------------------------------------
idx :: PhyloGroup -> Int
idx e = case elemIndex e nodes of
Nothing -> panic "[ERR][Gargantext.Core.Viz.Phylo.Metrics.Clustering] a node is missing"
Just i -> i
--------------------------------------
-----------------------
-- | Cluster Maker | --
-----------------------
-- | Optimisation to filter only relevant candidates
getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ filter (\(g,g') -> g /= g')
$ listToDirectedCombi gs
-- | To transform a Graph into Clusters
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of
Louvain (LouvainParams _) -> undefined
RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
_ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph nbDocs prox gs = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
$ getCandidates gs
candidates' = candidates `using` parList rdeepseq
in candidates' )
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined
-- | To filter a Graph of Proximity using a given threshold
filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
filterGraph prox (ns,es) = case prox of
WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
_ -> undefined
-- | To clusterise a Phylo
phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
phyloToClusters lvl clus p = Map.fromList
$ zip periods
$ map (\g -> if null (fst g)
then []
else graphToClusters clus g) graphs'
where
--------------------------------------
graphs' :: [([GroupNode],[GroupEdge])]
graphs' = traceGraphFiltered lvl
$ map (\g -> filterGraph prox g) graphs
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
gs' = gs `using` parList rdeepseq
in gs'
--------------------------------------
prox :: Proximity
prox = getProximity clus
--------------------------------------
periods :: [PhyloPeriodId]
periods = getPhyloPeriods p
--------------------------------------
----------------
-- | Tracer | --
----------------
traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = sort $ map snd $ concat $ map snd g
traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " edges\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = sort $ map snd $ concat $ map snd g
{-|
Module : Gargantext.Core.Viz.Phylo.Example
Description : Phylomemy example based on history of Cleopatre.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- | Cesar et Cleôpatre
-- Exemple de phylomemie
-- French without accents
TODO:
- split the functions : RAW -> Document -> Ngrams
-- reverse history: antechronologique
-- metrics support
-}
module Gargantext.Core.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph)
import Control.Lens hiding (both, Level)
import Data.Text (Text, toLower)
import Data.List ((++))
import Data.Map (Map,empty)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Cluster
import Gargantext.Core.Viz.Phylo.Aggregates
import Gargantext.Core.Viz.Phylo.BranchMaker
import Gargantext.Core.Viz.Phylo.LevelMaker
import Gargantext.Core.Viz.Phylo.LinkMaker
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.ViewMaker
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.Main (writePhylo)
import GHC.IO (FilePath)
import qualified Data.List as List
------------------------------------------------------
-- | STEP 12 | -- Create a PhyloView from a user Query
------------------------------------------------------
export :: IO ()
export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
phyloExport :: FilePath -> IO FilePath
phyloExport fp = writePhylo fp phyloView
phyloView :: PhyloView
phyloView = toPhyloView (queryParser' queryViewEx) phyloFromQuery
-- | To do : create an other request handler and an other query parser
queryParser' :: [Char] -> PhyloQueryView
queryParser' _q = phyloQueryView
queryViewEx :: [Char]
queryViewEx = "level=3"
++ "&childs=false"
++ "&filter=LonelyBranchFilter"
++ "&metric=BranchAge"
++ "&tagger=BranchPeakFreq"
++ "&tagger=GroupLabelCooc"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge,BranchBirth,BranchGroups] [] [BranchPeakInc,GroupLabelIncDyn] (Just (ByBranchBirth,Asc)) Json Flat True
--------------------------------------------------
-- | STEP 11 | -- Create a Phylo from a user Query
--------------------------------------------------
phyloFromQuery :: Phylo
phyloFromQuery = toPhylo phyloQueryBuild docs termList empty
-- | To do : create a request handler and a query parser
queryParser :: [Char] -> PhyloQueryBuild
queryParser _q = phyloQueryBuild
queryEx :: [Char]
queryEx = "title=Cesar et Cleôpatre"
++ "&desc=An example of Phylomemy (french without accent)"
++ "grain=5&steps=3"
++ "cluster=FrequentItemSet"
++ "interTemporalMatching=WeightedLogJaccard"
++ "nthLevel=2"
++ "nthCluster=RelatedComponents"
++ "nthProximity=Filiation"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
3 1 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.9 10) 5 0.8 0.5 4 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
----------------------------------------------------------------------------------------------------------------------------
-- | STEP 10 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
----------------------------------------------------------------------------------------------------------------------------
phylo6 :: Phylo
phylo6 = toNthLevel 6 defaultWeightedLogJaccard (RelatedComponents (initRelatedComponents (Just defaultWeightedLogJaccard))) phylo3
phylo3 :: Phylo
phylo3 = setPhyloBranches 3
$ interTempoMatching Descendant 3 defaultWeightedLogJaccard
$ interTempoMatching Ascendant 3 defaultWeightedLogJaccard
$ setLevelLinks (2,3)
$ addPhyloLevel 3
(phyloToClusters 2 (RelatedComponents (initRelatedComponents (Just defaultWeightedLogJaccard))) phyloBranch2)
phyloBranch2
--------------------------------
-- | STEP 9 | -- Cluster the Fis
--------------------------------
phyloBranch2 :: Phylo
phyloBranch2 = setPhyloBranches 2 phylo2_c
phylo2_c :: Phylo
phylo2_c = interTempoMatching Descendant 2 defaultWeightedLogJaccard phylo2_p
phylo2_p :: Phylo
phylo2_p = interTempoMatching Ascendant 2 defaultWeightedLogJaccard phylo2_1_2
phylo2_1_2 :: Phylo
phylo2_1_2 = setLevelLinks (1,2) phylo2
-- | phylo2 allready contains the LevelChilds links from 2 to 1
phylo2 :: Phylo
phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
----------------------------------
-- | STEP 8 | -- Find the Branches
----------------------------------
phyloBranch1 :: Phylo
phyloBranch1 = setPhyloBranches 1 phylo1_c
--------------------------------------------------------------------
-- | STEP 7 | -- Link the PhyloGroups of level 1 through the Periods
--------------------------------------------------------------------
phylo1_c :: Phylo
phylo1_c = interTempoMatching Descendant 1 defaultWeightedLogJaccard phylo1_p
phylo1_p :: Phylo
phylo1_p = interTempoMatching Ascendant 1 defaultWeightedLogJaccard phylo1_0_1
-----------------------------------------------
-- | STEP 6 | -- Build the level 1 of the Phylo
-----------------------------------------------
phylo1_0_1 :: Phylo
phylo1_0_1 = setLevelLinks (0,1) phylo1
-- phylo1_1_0 :: Phylo
-- phylo1_1_0 = setLevelLinks (1,0) phylo1
phylo1 :: Phylo
phylo1 = addPhyloLevel (1) phyloFis phylo'
-------------------------------------------------------------------
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them
-------------------------------------------------------------------
phylo' :: Phylo
phylo' = phylo & phylo_fis .~ phyloFis
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (docsToFis phyloDocs phylo) True 1 1
----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0
----------------------------------------
phylo :: Phylo
phylo = addPhyloLevel 0 phyloDocs phyloBase
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) docs
------------------------------------------------------------------------
-- | STEP 1 | -- Init the Base of the Phylo from Periods and Foundations
------------------------------------------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase phyloQueryBuild phyloParam docs termList empty
phyloParam :: PhyloParam
phyloParam = (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just phyloQueryBuild))
docs :: [Document]
docs = parseDocs foundationsRoots corpus
foundationsRoots :: Vector Ngrams
foundationsRoots = initFoundationsRoots (termListToNgrams termList)
--------------------------------------------
-- | STEP 0 | -- Let's start with an example
--------------------------------------------
termList :: TermList
termList = map (\a -> ([toLower a],[])) actants
actants :: [Ngrams]
actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
, "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
, "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
corpus :: [(Date, Text)]
corpus = List.sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]
......@@ -14,11 +14,11 @@ Portability : POSIX
{-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Viz.Phylo.API
module Gargantext.Core.Viz.Phylo.Legacy.LegacyAPI
where
import Data.Maybe (fromMaybe)
import Control.Lens ((^.))
-- import Data.Maybe (fromMaybe)
-- import Control.Lens ((^.))
import Data.String.Conversions
--import Control.Monad.Reader (ask)
import qualified Data.ByteString as DB
......@@ -33,12 +33,12 @@ import Web.HttpApiData (readTextData)
import Gargantext.API.Prelude
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import Gargantext.Database.Query.Table.Node (insertNodes, node, getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Database.Query.Table.Node (insertNodes, node)
-- import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Main
import Gargantext.Core.Viz.Phylo.Example
import Gargantext.Core.Viz.LegacyPhylo
import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
-- import Gargantext.Core.Viz.Phylo.Example
import Gargantext.Core.Types (TODO(..))
------------------------------------------------------------------------
......@@ -96,17 +96,20 @@ type GetPhylo = QueryParam "listId" ListId
-- Add real text processing
-- Fix Filter parameters
getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo phId _lId l msb = do
phNode <- getNodeWith phId (Proxy :: Proxy HyperdataPhylo)
let
level = fromMaybe 2 l
branc = fromMaybe 2 msb
maybePhylo = phNode ^. (node_hyperdata . hp_data)
p <- liftBase $ viewPhylo2Svg
$ viewPhylo level branc
$ fromMaybe phyloFromQuery maybePhylo
pure (SVG p)
getPhylo _ _lId _ _ = undefined
-- getPhylo phId _lId l msb = do
-- phNode <- getNodeWith phId (Proxy :: Proxy HyperdataPhylo)
-- let
-- level = fromMaybe 2 l
-- branc = fromMaybe 2 msb
-- maybePhylo = phNode ^. (node_hyperdata . hp_data)
-- p <- liftBase $ viewPhylo2Svg
-- $ viewPhylo level branc
-- $ fromMaybe phyloFromQuery maybePhylo
-- pure (SVG p)
------------------------------------------------------------------------
type PostPhylo = QueryParam "listId" ListId
-- :> ReqBody '[JSON] PhyloQueryBuild
......@@ -137,9 +140,9 @@ putPhylo = undefined
-- | Instances
instance Arbitrary Phylo where arbitrary = elements [phylo]
-- instance Arbitrary Phylo where arbitrary = elements [phylo]
instance Arbitrary PhyloGroup where arbitrary = elements []
instance Arbitrary PhyloView where arbitrary = elements [phyloView]
-- instance Arbitrary PhyloView where arbitrary = elements [phyloView]
instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData
instance FromHttpApiData ExportMode where parseUrlPiece = readTextData
instance FromHttpApiData Filiation where parseUrlPiece = readTextData
......
......@@ -11,35 +11,37 @@ Portability : POSIX
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Main
module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
where
import Data.GraphViz
-- import Data.GraphViz
-- import qualified Data.ByteString as DB
import qualified Data.List as List
import Data.Maybe
import Data.Text (Text)
import Debug.Trace (trace)
import GHC.IO (FilePath)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types
import Gargantext.Database.Admin.Types.Node
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types
import Gargantext.Core.Viz.Phylo hiding (Svg, Dot)
import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import Gargantext.Database.Query.Table.Node(defaultList)
import Gargantext.Prelude
import Gargantext.Database.Action.Flow
import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Query.Table.Node(defaultList)
import Gargantext.Database.Query.Table.NodeNode (selectDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.ByteString as DB
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
import Gargantext.Database.Query.Table.NodeNode (selectDocs)
import Gargantext.Core.Types
-- import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
-- import Gargantext.Core.Viz.Phylo.Tools
-- import Gargantext.Core.Viz.Phylo.View.Export
-- import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
type MinSizeBranch = Int
......@@ -91,31 +93,40 @@ flowPhylo' corpus terms l m fp = do
defaultQuery :: PhyloQueryBuild
defaultQuery = defaultQueryBuild'
"Default Title"
"Default Description"
defaultQuery = undefined
-- defaultQuery = defaultQueryBuild'
-- "Default Title"
-- "Default Description"
buildPhylo :: [Document] -> TermList -> Phylo
buildPhylo = trace (show defaultQuery) $ buildPhylo' defaultQuery
buildPhylo' :: PhyloQueryBuild -> [Document] -> TermList -> Phylo
buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
buildPhylo' _ _ _ = undefined
-- buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
-- refactor 2021
-- queryView :: Level -> MinSizeBranch -> PhyloQueryView
-- queryView level _minSizeBranch = PhyloQueryView level Merge False 2
-- [BranchAge]
-- []
-- -- [SizeBranch $ SBParams minSizeBranch]
-- [BranchPeakFreq,GroupLabelCooc]
-- (Just (ByBranchAge,Asc))
-- Json Flat True
queryView :: Level -> MinSizeBranch -> PhyloQueryView
queryView level _minSizeBranch = PhyloQueryView level Merge False 2
[BranchAge]
[]
-- [SizeBranch $ SBParams minSizeBranch]
[BranchPeakFreq,GroupLabelCooc]
(Just (ByBranchAge,Asc))
Json Flat True
queryView _level _minSizeBranch = undefined
viewPhylo :: Level -> MinSizeBranch -> Phylo -> PhyloView
viewPhylo l b phylo = toPhyloView (queryView l b) phylo
viewPhylo _l _b _phylo = undefined
-- viewPhylo l b phylo = toPhyloView (queryView l b) phylo
writePhylo :: FilePath -> PhyloView -> IO FilePath
writePhylo fp phview = runGraphviz (viewToDot phview) Svg fp
writePhylo _fp _phview = undefined
-- writePhylo fp phview = runGraphviz (viewToDot phview) Svg fp
viewPhylo2Svg :: PhyloView -> IO DB.ByteString
viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
-- refactor 2021
-- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
-- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeSynonymInstances #-}
module Gargantext.Core.Viz.Phylo.LevelMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, last, null)
import Data.Map (Map, (!), empty, singleton, size)
import Data.Text (Text)
import Data.Tuple.Extra
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.Aggregates
import Gargantext.Core.Viz.Phylo.Cluster
import Gargantext.Core.Viz.Phylo.BranchMaker
import Gargantext.Core.Viz.Phylo.LinkMaker
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Text.Context (TermList)
import qualified Data.Vector.Storable as VS
import qualified Data.Set as Set
import qualified Data.Vector as Vector
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-------------------------
-- | PhyloLevelMaker | --
-------------------------
-- | A typeClass for polymorphic PhyloLevel functions
class PhyloLevelMaker aggregate
where
-- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
-- | To create a list of PhyloGroups based on a list of aggregates a
toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
instance PhyloLevelMaker PhyloCluster
where
--------------------------------------
-- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl > 1 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p =
let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
clusters' = clusters `using` parList rdeepseq
in clusters'
--------------------------------------
instance PhyloLevelMaker PhyloFis
where
--------------------------------------
-- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl == 1 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _ p =
let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
groups' = groups `using` parList rdeepseq
in groups'
--------------------------------------
instance PhyloLevelMaker Document
where
--------------------------------------
-- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl == 0 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
--------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
$ (nub . concat)
$ map text l
--------------------------------------
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
addPhyloLevel' lvl m p = alterPhyloPeriods
(\period -> let pId = _phylo_periodId period
in over phylo_periodLevels
(\phyloLevels ->
let groups = toPhyloGroups lvl pId (m ! pId) m p
in trace (show (length groups)
<> " groups for "
<> show (pId) )
$ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period
) p
----------------------
-- | toPhyloGroup | --
----------------------
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloFis
-> Map Date (Map (Int,Int) Double)
-> Vector Ngrams
-> PhyloGroup
cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
(singleton "support" (fromIntegral $ getSupport fis))
Nothing
cooc
[] [] [] childs
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
--------------------------------------
ngrams :: [Int]
ngrams = sort $ map (\x -> getIdxInRoots' x root)
$ Set.toList
$ getClique fis
--------------------------------------
childs :: [Pointer]
childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
--------------------------------------
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloCluster
-> Map (Date,Date) [PhyloCluster]
-> Phylo
-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
empty
Nothing
cooc
ascLink desLink [] childs
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams)
(periodsToYears [prd] )
(getPhyloCooc p )
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
(getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
----------------------
-- | toPhyloLevel | --
----------------------
-- | To reconstruct the Phylo from a set of Document to a given Level
toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
-- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
--------------------------------------
-- phylo0 :: Phylo
-- phylo0 = tracePhyloN 0
-- $ addPhyloLevel 0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase
$ toPhyloBase q init c termList fis
where
init = initPhyloParam (Just defaultPhyloVersion)
(Just defaultSoftware )
(Just q )
---------------------------------------
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
toNthLevel lvlMax prox clus p
| lvl >= lvlMax = p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
-- \$ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1) (clusters) p
where
--------------------------------------
clusters :: Map (Date,Date) [PhyloCluster]
clusters = phyloToClusters lvl clus p
--------------------------------------
lvl :: Level
lvl = getLastLevel p
--------------------------------------
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceBranches 1
-- \$ reLinkPhyloBranches 1
-- \$ traceBranches 1
$ setPhyloBranches 1
$ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox
$ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1
-- \$ setLevelLinks (0,1)
$ addPhyloLevel 1 (getPhyloFis phyloFis)
$ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
where
--------------------------------------
phyloFis :: Phylo
phyloFis = if (null $ getPhyloFis p)
then p & phylo_fis .~ refineFis (docsToFis d p) k s t
else p & phylo_fis .~ docsToFis d p
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
-- | To create the base of the Phylo (foundations, periods, cooc, etc)
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
--------------------------------------
-----------------
-- | Tracers | --
-----------------
tracePhyloN :: Level -> Phylo -> Phylo
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
traceTranspose :: Level -> Filiation -> Phylo -> Phylo
traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
tracePhyloBase :: Phylo -> Phylo
tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
<> show (length $ _phylo_periods p) <> " periods from "
<> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
<> " to "
<> show (getPhyloPeriodId $ last $ _phylo_periods p)
<> "\n"
<> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length pts) <> " pointers\n") p
where
--------------------------------------
pts :: [Pointer]
pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
--------------------------------------
traceBranches :: Level -> Phylo -> Phylo
traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
<> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
<> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
<> show (percentile 50 (VS.fromList brs)) <> " (50%) "
<> show (percentile 75 (VS.fromList brs)) <> " (75%) "
<> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
where
--------------------------------------
brs :: [Double]
brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
$ filter (\(id,_) -> (fst id) == lvl)
$ getGroupsByBranches p
--------------------------------------
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.LinkMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, concat, delete, intersect, elemIndex, groupBy, union, inits, scanl, find)
import Data.Tuple.Extra
import Data.Map (Map, (!), fromListWith, elems, restrictKeys, filterWithKey, keys, unionWith, unions, intersectionWith, member, fromList)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.Metrics
import qualified Data.List as List
import qualified Data.Maybe as Maybe
import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-----------------------------
-- | From Level to level | --
-----------------------------
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
linkGroupToGroups :: PhyloGroup -> [PhyloGroup] -> PhyloGroup
linkGroupToGroups current targets = over (phylo_groupLevelParents) addPointers current
where
--------------------------------------
addPointers :: [Pointer] -> [Pointer]
addPointers lp = lp ++ Maybe.mapMaybe (\target ->
if (elem (getGroupId current) (getGroupLevelChildsId target))
then Just ((getGroupId target),1)
else Nothing) targets
--------------------------------------
setLevelLinks :: (Level,Level) -> Phylo -> Phylo
setLevelLinks (lvl,lvl') p = alterGroupWithLevel (\group -> linkGroupToGroups group
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams group) (getGroupNgrams g'))
$ getGroupsWithFilters lvl' (getGroupPeriod group) p) lvl p
-------------------------------
-- | From Period to Period | --
-------------------------------
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods to' limit id l = case to' of
Descendant -> take limit $ (tail . snd) next
Ascendant -> take limit $ (reverse . fst) next
_ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
where
--------------------------------------
next :: ([PhyloPeriodId], [PhyloPeriodId])
next = splitAt idx l
--------------------------------------
idx :: Int
idx = case (List.elemIndex id l) of
Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
Just i -> i
--------------------------------------
-- | To get the number of docs produced during a list of periods
periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
periodsToNbDocs prds phylo = sum $ elems
$ restrictKeys (phylo ^. phylo_docsByYears)
$ periodsToYears prds
-- | To process a given Proximity
processProximity :: Proximity -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
WeightedLogJaccard (WLJParams _ sens) -> weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
Hamming (HammingParams _) -> hamming cooc cooc'
_ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
filterProximity :: Double -> Proximity -> Bool
filterProximity score prox = case prox of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
_ -> panic "[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
makePairs :: [(Date,Date)] -> PhyloGroup -> Phylo -> [(PhyloGroup,PhyloGroup)]
makePairs prds g p = filter (\pair -> ((last' "makePairs" prds) == (getGroupPeriod $ fst pair))
|| ((last' "makePairs" prds) == (getGroupPeriod $ snd pair)))
$ listToPairs
$ filter (\g' -> (elem (getGroupPeriod g') prds)
&& ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
&& (((last' "makePairs" prds) == (getGroupPeriod g))
||((matchWithPairs g (g,g') p) >= (getPhyloMatchingFrameTh p))))
$ getGroupsWithLevel (getGroupLevel g) p
matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
matchWithPairs g1 (g2,g3) p =
let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
cooc = if (g2 == g3)
then getGroupCooc g2
else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
ngrams = if (g2 == g3)
then getGroupNgrams g2
else union (getGroupNgrams g2) (getGroupNgrams g3)
in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
phyloGroupMatching periods g p = case pointers of
Nothing -> []
Just pts -> head' "phyloGroupMatching"
-- Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd pts
-- Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
pointers :: Maybe [Pointer]
pointers = find (not . null)
-- For each time frame, process the Proximity on relevant pairs of targeted groups
$ scanl (\acc frame ->
let pairs = makePairs frame g p
in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
$ concat
$ map (\(t,t') ->
let proxi = matchWithPairs g (t,t') p
in
if (t == t')
then [(getGroupId t,proxi)]
else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
-- [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$ inits periods
--------------------------------------
-- | To add some Pointer to a PhyloGroup
addPointers' :: Filiation -> [Pointer] -> PhyloGroup -> PhyloGroup
addPointers' fil pts g = g & case fil of
Descendant -> phylo_groupPeriodChilds %~ (++ pts)
Ascendant -> phylo_groupPeriodParents %~ (++ pts)
_ -> panic ("[ERR][Viz.Phylo.LinkMaker.addPointers] Wrong type of filiation")
-- | To update a list of phyloGroups with some Pointers
updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLevel g) == lvl) && (member (getGroupId g) m)
then addPointers' fil (m ! (getGroupId g)) g
else g ) gs) p
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
initCandidates :: PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [PhyloGroup]
initCandidates g prds gs = filter (\g' -> elem (getGroupPeriod g') prds)
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ delete g gs
-- | a init avec la [[head groups]] et la tail groups
toBranches :: [[PhyloGroup]] -> [PhyloGroup] -> [[PhyloGroup]]
toBranches mem gs
| null gs = mem
| otherwise = toBranches mem' $ tail gs
where
--------------------------------------
mem' :: [[PhyloGroup]]
mem' = if (null withHead)
then mem ++ [[head' "toBranches" gs]]
else (filter (\gs' -> not $ elem gs' withHead) mem)
++
[(concat withHead) ++ [head' "toBranches" gs]]
--------------------------------------
withHead :: [[PhyloGroup]]
withHead = filter (\gs' -> (not . null)
$ intersect (concat $ map getGroupNgrams gs')
(getGroupNgrams $ (head' "toBranches" gs))
) mem
--------------------------------------
-- | To process an intertemporal matching task to a Phylo at a given level
-- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
where
--------------------------------------
pointers :: [(PhyloGroupId,[Pointer])]
pointers =
let pts = map (\g -> let periods = getNextPeriods fil (getPhyloMatchingFrame p) (getGroupPeriod g) (getPhyloPeriods p)
in (getGroupId g, phyloGroupMatching periods g p)) groups
pts' = pts `using` parList rdeepseq
in pts'
--------------------------------------
groups :: [PhyloGroup]
groups = getGroupsWithLevel lvl p
--------------------------------------
------------------------------------------------------------------------
-- | Make links from Period to Period after level 1
listToTuple :: (a -> b) -> [a] -> [(b,a)]
listToTuple f l = map (\x -> (f x, x)) l
groupsToMaps :: Ord b => (PhyloGroup -> b) -> [PhyloGroup] -> [Map PhyloGroupId PhyloGroup]
groupsToMaps f gs = map (\gs' -> fromList $ listToTuple getGroupId gs')
$ groupBy ((==) `on` f)
$ sortOn f gs
phyloToPeriodMaps :: Level -> Filiation -> Phylo -> [Map PhyloGroupId PhyloGroup]
phyloToPeriodMaps lvl fil p =
let prdMap = groupsToMaps (fst . getGroupPeriod) (getGroupsWithLevel lvl p)
in case fil of
Ascendant -> reverse prdMap
Descendant -> prdMap
_ -> panic ("[ERR][Viz.Phylo.LinkMaker.phyloToPeriodMaps] Wrong type of filiation")
trackPointersRec :: Filiation -> Map PhyloGroupId PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
trackPointersRec fil m gs res =
if (null gs) then res
else if (Map.null m) then res ++ gs
else
let g = head' "track" gs
pts = Map.fromList $ getGroupPointers PeriodEdge fil g
pts' = Map.toList $ fromListWith (\w w' -> max w w') $ concat $ elems
$ intersectionWith (\w g' -> map (\(id,_w') -> (id, w))
$ getGroupPointers LevelEdge Ascendant g') pts m
res' = res ++ [case fil of
Ascendant -> g & phylo_groupPeriodParents .~ pts'
Descendant -> g & phylo_groupPeriodChilds .~ pts'
_ -> panic ("[ERR][Viz.Phylo.LinkMaker.transposeLinks] Wrong type of filiation")]
in trackPointersRec fil (filterWithKey (\k _ -> not $ elem k (keys pts)) m) (tail' "track" gs) res'
transposeLinks :: Level -> Filiation -> Phylo -> Phylo
transposeLinks lvl fil p =
let prdMap = zip (phyloToPeriodMaps (lvl - 1) fil p) (phyloToPeriodMaps lvl fil p)
transposed = map (\(gs,gs') ->
let idx = fromJust $ elemIndex (gs,gs') prdMap
next = take (getPhyloMatchingFrame p) $ snd $ splitAt (idx + 1) prdMap
groups = trackPointersRec fil (unions $ map fst next) (elems gs') []
in (getGroupPeriod $ head' "transpose" groups ,groups)
) prdMap
transposed' = Map.fromList $ (transposed `using` parList rdeepseq)
in alterPhyloGroups
(\gs -> if ((not . null) gs) && (lvl == (getGroupLevel $ head' "transpose" gs))
then transposed' ! (getGroupPeriod $ head' "transpose" gs)
else gs
) p
-- | Transpose the parent/child pointers from one level to another
transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterPhyloGroups
(\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs)
then
let groups = map (\g -> let m = reduceGroups g lvlGroups
in g & phylo_groupPeriodParents .~ (trackPointers m $ g ^. phylo_groupPeriodParents)
& phylo_groupPeriodChilds .~ (trackPointers m $ g ^. phylo_groupPeriodChilds )) gs
groups' = groups `using` parList rdeepseq
in groups'
else gs
) p
where
--------------------------------------
-- | find an other way to find the group from the id
trackPointers :: Map PhyloGroupId PhyloGroup -> [Pointer] -> [Pointer]
trackPointers m pts = Map.toList
$ fromListWith (\w w' -> max w w')
$ map (\(id,_w) -> (getGroupLevelParentId $ m ! id,_w)) pts
--------------------------------------
reduceGroups :: PhyloGroup -> [PhyloGroup] -> Map PhyloGroupId PhyloGroup
reduceGroups g gs = Map.fromList
$ map (\g' -> (getGroupId g',g'))
$ filter (\g' -> ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))) gs
--------------------------------------
lvlGroups :: [PhyloGroup]
lvlGroups = getGroupsWithLevel (lvl - 1) p
--------------------------------------
----------------
-- | Tracer | --
----------------
traceMatching :: Filiation -> Level -> Double -> [Double] -> Phylo -> Phylo
traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential pointers (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
<> "with sizes : " <> show (map length bs) <> "\n") bs
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Metrics
where
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Control.Lens hiding (Level)
import Data.List ((\\), sortOn, concat, nub, union, intersect, null, (++), sort)
import Data.Map (Map, (!), toList, size, insert, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault, fromListWith)
import Data.Text (Text)
-- import Debug.Trace (trace)
----------------
-- | Ngrams | --
----------------
-- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m)
/ (m ! (j,j))
-- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
-- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Return the inclusion score of a given ngram
inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Process some metrics on top of ngrams
getNgramsMeta :: Map (Int, Int) Double -> [Int] -> Map Text [Double]
getNgramsMeta m ngrams = fromList
[ ("genericity" , map (\n -> genericity m (ngrams \\ [n]) n) ngrams ),
("specificity", map (\n -> specificity m (ngrams \\ [n]) n) ngrams ),
("inclusion" , map (\n -> inclusion m (ngrams \\ [n]) n) ngrams )]
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take nth
$ reverse
$ sortOn snd $ toList cooc
-------------------------
-- | Ngrams Dynamics | --
-------------------------
sharedWithParents :: Date -> PhyloBranchId -> Int -> PhyloView -> Bool
sharedWithParents inf bid n pv = elem n
$ foldl (\mem pn -> if ((bid == (fromJust $ (pn ^. pn_bid)))
&& (inf > (fst $ getNodePeriod pn)))
then nub $ mem ++ (pn ^. pn_idx)
else mem ) []
$ (pv ^. pv_nodes)
findDynamics :: Int -> PhyloView -> PhyloNode -> Map Int (Date,Date) -> Double
findDynamics n pv pn m =
let prd = getNodePeriod pn
bid = fromJust $ (pn ^. pn_bid)
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- emergence
then 2
else if ((fst prd) == (fst $ m ! n))
-- recombination
then 0
else if (not $ sharedWithParents (fst prd) bid n pv)
-- decrease
then 1
else 3
processDynamics :: PhyloView -> PhyloView
processDynamics pv = alterPhyloNode (\pn ->
pn & pn_metrics %~ insert "dynamics" (map (\n -> findDynamics n pv pn ngramsDates) $ (pn ^. pn_idx) ) ) pv
where
--------------------------------------
ngramsDates :: Map Int (Date,Date)
ngramsDates = map (\ds -> let ds' = sort ds
in (head' "Dynamics" ds', last' "Dynamics" ds'))
$ fromListWith (++)
$ foldl (\mem pn -> mem ++ (map (\n -> (n, [fst $ getNodePeriod pn, snd $ getNodePeriod pn]))
$ (pn ^. pn_idx))) []
$ (pv ^. pv_nodes)
--------------------------------------
-------------------
-- | Proximity | --
-------------------
-- | Process the inverse sumLog
sumInvLog :: Double -> [Double] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
-- | Process the sumLog
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | To get the diagonal of a matrix
toDiago :: Map (Int, Int) Double -> [Double]
toDiago cooc = elems $ filterWithKey (\(x,x') _ -> x == x') cooc
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
| null gInter = 0
| gInter == gUnion = 1
| sens == 0 = jaccard gInter gUnion
| sens > 0 = (sumInvLog sens wInter) / (sumInvLog sens wUnion)
| otherwise = (sumLog sens wInter) / (sumLog sens wUnion)
where
--------------------------------------
gInter :: [Int]
gInter = intersect ngrams ngrams'
--------------------------------------
gUnion :: [Int]
gUnion = union ngrams ngrams'
--------------------------------------
wInter :: [Double]
wInter = toDiago $ map (/nbDocs) $ intersectionWith (+) cooc cooc'
--------------------------------------
wUnion :: [Double]
wUnion = toDiago $ map (/nbDocs) $ unionWith (+) cooc cooc'
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double
hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2))
where
--------------------------------------
inter :: Map (Int, Int) Double
inter = intersection f1 f2
--------------------------------------
......@@ -49,7 +49,7 @@ phyloDot = toPhyloExport phylo2
--------------------------------------------------
phylo2 :: Phylo
phylo2 = synchronicClustering phylo1
phylo2 = synchronicClustering $ toHorizon phylo1
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
......@@ -104,7 +104,7 @@ config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2
, exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 }
, clique = MaxClique 0 15 ByNeighbours }
docs :: [Document]
......
......@@ -661,8 +661,8 @@ toPhyloExport phylo = exportToDot phylo
groups = traceExportGroups
$ processDynamics
$ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo
$ toHorizon phylo
$ tracePhyloInfo phylo
-- $ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
......@@ -670,12 +670,10 @@ traceExportBranches branches = trace ("\n"
<> "-- | Export " <> show(length branches) <> " branches") branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups = trace ("\n"
<> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
) groups
tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
<> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
) phylo
......
......@@ -24,6 +24,8 @@ import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
......@@ -49,20 +51,22 @@ toPhylo' (PhyloBase phylo) = toPhylo
-}
toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
$ traceToPhylo (phyloLevel conf) $
if (phyloLevel conf) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phylo1 [2..(phyloLevel conf)]
toPhylo :: Phylo -> Phylo
toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
$ traceToPhylo (phyloLevel $ getConfig phyloStep) $
if (phyloLevel $ getConfig phyloStep) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloLevel $ getConfig phyloStep)]
else phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 docs phyloBase
-- > AD to db here
phyloAncestors :: Phylo
phyloAncestors =
if (findAncestors $ getConfig phyloStep)
then toHorizon phylo1
else phylo1
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
phylo1 :: Phylo
phylo1 = toPhylo1 phyloStep
-- > AD to db here
--------------------------------------
......@@ -131,13 +135,21 @@ cliqueToGroup fis pId lvl idx coocs = PhyloGroup pId lvl idx ""
[] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
Constante start gap -> constanteTemporalMatching start gap
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative steps -> adaptativeTemporalMatching steps
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
toPhylo1 :: Phylo -> Phylo
toPhylo1 phyloStep = case (getSeaElevation phyloStep) of
Constante start gap -> constanteTemporalMatching start gap phyloStep
Adaptative steps -> adaptativeTemporalMatching steps phyloStep
-----------------------
-- | To Phylo Step | --
-----------------------
-- To build the first phylo step from docs and terms
toPhyloStep :: [Document] -> TermList -> Config -> Phylo
toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative _ -> toGroupsProxi 1 $ appendGroups cliqueToGroup 1 phyloClique phyloBase
where
--------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
......@@ -145,9 +157,10 @@ toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
--------------------------------------
docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
-- docs' = groupDocsByPeriod' date (getPeriodIds phyloBase) docs
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
--------------------------------------
---------------------------
-- | Frequent Item Set | --
......@@ -196,8 +209,8 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ filterClique True s (filterCliqueBySupport)
{- \$ traceFis "Unfiltered Fis" -}
phyloClique
MaxClique s -> filterClique True s (filterCliqueBySize)
phyloClique
MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
phyloClique
where
--------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
......@@ -209,13 +222,13 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
MaxClique _ ->
MaxClique _ thr filterType ->
let mcl = map (\(prd,docs) ->
let cooc = map round
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.001 cooc))
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
......
......@@ -225,12 +225,12 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
getCliqueSupport :: Clique -> Int
getCliqueSupport unit = case unit of
Fis s _ -> s
MaxClique _ -> 0
MaxClique _ _ _ -> 0
getCliqueSize :: Clique -> Int
getCliqueSize unit = case unit of
Fis _ s -> s
MaxClique s -> s
MaxClique s _ _ -> s
--------------
......@@ -295,12 +295,14 @@ filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local =
case proximity of
WeightedLogJaccard _ -> local >= thr
WeightedLogSim _ -> local >= thr
Hamming -> undefined
getProximityName :: Proximity -> String
getProximityName proximity =
case proximity of
WeightedLogJaccard _ -> "WLJaccard"
WeightedLogSim _ -> "WeightedLogSim"
Hamming -> "Hamming"
---------------
......@@ -344,6 +346,16 @@ getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
setConfig :: Config -> Phylo -> Phylo
setConfig config phylo = phylo
& phylo_param .~ (PhyloParam
((phylo ^. phylo_param) ^. phyloParam_version)
((phylo ^. phylo_param) ^. phyloParam_software)
config)
-- & phylo_param & phyloParam_config & phyloParam_config .~ config
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
......@@ -464,7 +476,7 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo =
trace ( "\n" <> "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
......@@ -484,6 +496,7 @@ traceSynchronyStart phylo =
getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s -> s
WeightedLogSim s -> s
Hamming -> undefined
----------------
......
......@@ -42,7 +42,7 @@ mergeGroups coocs id mapIds childs =
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
[]
(mergeAncestors $ concat $ map _phylo_groupAncestors childs)
where
--------------------
bId :: [Int]
......@@ -50,6 +50,9 @@ mergeGroups coocs id mapIds childs =
--------------------
updatePointers :: [Pointer] -> [Pointer]
updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
--------------------
mergeAncestors :: [Pointer] -> [Pointer]
mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloLevel :: Level -> Phylo -> Phylo
......@@ -132,8 +135,12 @@ groupsToEdges prox sync nbDocs diago groups =
toEdges sens edges =
case prox of
WeightedLogJaccard _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' sens nbDocs diago
((g,g'), weightedLogJaccard' (sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
WeightedLogSim _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined
toParentId :: PhyloGroup -> PhyloGroupId
......@@ -169,6 +176,17 @@ adjustClustering sync branches = case sync of
ByProximityDistribution _ _ -> branches
levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
levelUpAncestors groups =
-- 1) create an associative map of (old,new) ids
let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupLevelParents))) groups
in map (\g ->
let id' = ids' ! (getGroupId g)
ancestors = g ^. phylo_groupAncestors
-- 2) level up the ancestors ids and filter the ones that will be merged
ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors
in g & phylo_groupAncestors .~ ancestors'
) groups
synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo =
......@@ -182,7 +200,7 @@ synchronicClustering phylo =
$ phyloToLastBranches
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
in toNextLevel' phylo $ concat newBranches'
in toNextLevel' phylo $ levelUpAncestors $ concat newBranches'
-- synchronicDistance :: Phylo -> Level -> String
......
......@@ -18,7 +18,7 @@ import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Prelude (floor)
import Prelude (floor,tan,pi)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
......@@ -27,6 +27,8 @@ import Text.Printf
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
-------------------
......@@ -41,12 +43,12 @@ jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . le
-- | Process the inverse sumLog
sumInvLog' :: Double -> Double -> [Double] -> Double
sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + s) / log (nb + s)))) 0 diago
sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2))))) 0 diago
-- | Process the sumLog
sumLog' :: Double -> Double -> [Double] -> Double
sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + s) / log (nb + s))) 0 diago
sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago
weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
......@@ -71,19 +73,49 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
--------------------------------------
-- | Process the weighted similarity between clusters. Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
-- tests not conclusive
weightedLogSim' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
| null ngramsInter = 0
| ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog' sens nbDocs diagoInter) / minimum [(sumInvLog' sens nbDocs diagoEgo),(sumInvLog' sens nbDocs diagoTarget)]
| otherwise = (sumLog' sens nbDocs diagoInter) / minimum [(sumLog' sens nbDocs diagoEgo),(sumLog' sens nbDocs diagoTarget)]
where
--------------------------------------
ngramsInter :: [Int]
ngramsInter = intersect ego_ngrams target_ngrams
--------------------------------------
ngramsUnion :: [Int]
ngramsUnion = union ego_ngrams target_ngrams
--------------------------------------
diagoInter :: [Double]
diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
--------------------------------------
diagoEgo :: [Double]
diagoEgo = elems $ restrictKeys diago (Set.fromList ego_ngrams)
--------------------------------------
diagoTarget :: [Double]
diagoTarget = elems $ restrictKeys diago (Set.fromList target_ngrams)
--------------------------------------
-- | To process the proximity between a current group and a pair of targets group
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
-- | To process the proximity between a current group and a pair of targets group using the adapted Wang et al. Similarity
toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
case proximity of
WeightedLogJaccard sens ->
let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams
else union targetNgrams targetNgrams'
in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
WeightedLogSim sens ->
let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams
else union targetNgrams targetNgrams'
in weightedLogSim' sens nbDocs diago egoNgrams pairNgrams
Hamming -> undefined
------------------------
-- | Local Matching | --
------------------------
......@@ -257,20 +289,24 @@ relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
relevantBranches term branches =
filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
accuracy :: Int -> [PhyloGroup] -> Double
accuracy x bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
/ (fromIntegral $ length bk))
accuracy :: Int -> [(Date,Date)] -> [PhyloGroup] -> Double
-- The accuracy of a branch relatively to a term x is computed only over the periods there exist some cluster mentionning x in the phylomemy
accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk')
/ (fromIntegral $ length bk'))
where
bk' :: [PhyloGroup]
bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk
recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
/ (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
fScore :: Double -> Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
fScore beta x bk bx =
fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double
fScore lambda x periods bk bx =
let rec = recall x bk bx
acc = accuracy x bk
in ((1 + beta ** 2) * acc * rec)
/ (((beta ** 2) * rec + acc))
acc = accuracy x periods bk
in ((1 + lambda ** 2) * acc * rec)
/ (((lambda ** 2) * acc + rec))
wk :: [PhyloGroup] -> Double
......@@ -278,13 +314,14 @@ wk bk = fromIntegral $ length bk
toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality' beta freq branches =
toPhyloQuality' lambda freq branches =
if (null branches)
then 0
else sum
$ map (\i ->
let bks = relevantBranches i branches
in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore beta i bk bks)) bks))
periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks
in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore lambda i periods bk bks)) bks))
$ keys freq
toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
......@@ -311,8 +348,10 @@ toAccuracy freq branches =
$ map (\x ->
let px = freq ! x
bx = relevantBranches x branches
-- | periods containing x
periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx
wks = sum $ map wk bx
in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x bk)) bx))
in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x periods bk)) bx))
$ keys freq
where
pys :: Double
......@@ -320,21 +359,26 @@ toAccuracy freq branches =
-- | here we do the average of all the local f_scores
toPhyloQuality :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality beta freq branches =
toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality fdt lambda freq branches =
if (null branches)
then 0
else sum
$ map (\x ->
let px = freq ! x
bx = relevantBranches x branches
wks = sum $ map wk bx
in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx))
-- let px = freq ! x
let bx = relevantBranches x branches
-- | periods containing x
periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx
wks = sum $ map wk bx
-- in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx))
-- in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x periods bk bx)) bx))
in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore (tan (lambda * pi / 2)) x periods bk bx)) bx))
$ keys freq
where
pys :: Double
pys = sum (elems freq)
-- where
-- pys :: Double
-- pys = sum (elems freq)
-- 1 / nb de foundation
------------------------------------
-- | Constant Temporal Matching | --
......@@ -375,9 +419,9 @@ updateThr thr branches = map (\b -> map (\g ->
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
breakBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
-> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego
let done' = done ++ (if snd ego
then
......@@ -400,12 +444,12 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
else breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
--------------------------------------
quality :: Double
quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
......@@ -417,29 +461,29 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality beta frequency
quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
seaLevelMatching :: Double -> Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
seaLevelMatching fdt proximity lambda minBranch frequency thr step depth elevation frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches)
then branches
else
-- break all the possible branches at the current seaLvl level
let quality = toPhyloQuality beta frequency (map fst branches)
let quality = toPhyloQuality fdt lambda frequency (map fst branches)
acc = toAccuracy frequency (map fst branches)
rec = toRecall frequency (map fst branches)
branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(β) = " <> printf "%.5f" quality
branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(λ) = " <> printf "%.5f" quality
<> " ξ = " <> printf "%.5f" acc
<> " ρ = " <> printf "%.5f" rec
<> " branches = " <> show(length branches) <> " ↴")
$ breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
$ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
in seaLevelMatching proximity beta minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
in seaLevelMatching fdt proximity lambda minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
......@@ -450,7 +494,8 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
-- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ seaLevelMatching (phyloProximity $ getConfig phylo)
$ seaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
......@@ -461,7 +506,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
groups
(reverse $ sortOn (length . fst) groups)
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],Bool)]
......@@ -529,11 +574,11 @@ toThreshold lvl proxiGroups =
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
adaptativeBreakBranches :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
-> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
-> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality'))
......@@ -553,7 +598,7 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
else adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
--------------------------------------
......@@ -561,7 +606,7 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min
thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
--------------------------------------
quality :: Double
quality = toPhyloQuality beta frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
......@@ -573,21 +618,21 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality beta frequency
quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
adaptativeSeaLevelMatching :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
-> Double -> Int -> Map Int Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
adaptativeSeaLevelMatching fdt proxiConf depth elevation groupsProxi lambda minBranch frequency frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level >= depth then end
if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches
else
-- break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
let branches' = adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
......@@ -596,7 +641,7 @@ adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch
<> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
<> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
<> " thr = ")
$ adaptativeSeaLevelMatching proxiConf (depth - 1) elevation groupsProxi' beta minBranch frequency' frame periods docs coocs branches'
$ adaptativeSeaLevelMatching fdt proxiConf (depth - 1) elevation groupsProxi' lambda minBranch frequency' frame periods docs coocs branches'
adaptativeTemporalMatching :: Double -> Phylo -> Phylo
......@@ -607,7 +652,8 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
-- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
$ adaptativeSeaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo)
(elevation - 1)
elevation
(phylo ^. phylo_groupsProxi)
......
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Tools
where
import Control.Lens hiding (both, Level, Empty)
import Data.List (intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy, concat)
import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!), restrictKeys, elems, empty, filterWithKey, unionWith)
import Data.Set (Set)
import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra
import Data.Vector (Vector,elemIndex)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
--------------
-- | Misc | --
--------------
-- | Define a default value
def :: a -> Maybe a -> a
def = fromMaybe
-- | Does a List of Sets contains at least one Set of an other List
doesAnySetContains :: Eq a => Set a -> [Set a] -> [Set a] -> Bool
doesAnySetContains h l l' = any (\c -> doesContains (Set.toList c) (Set.toList h)) (l' ++ l)
-- | Does a list of A contains an other list of A
doesContains :: Eq a => [a] -> [a] -> Bool
doesContains l l'
| null l' = True
| length l' > length l = False
| elem (head' "doesContains" l') l = doesContains l (tail l')
| otherwise = False
-- | Does a list of ordered A contains an other list of ordered A
doesContainsOrd :: Eq a => Ord a => [a] -> [a] -> Bool
doesContainsOrd l l'
| null l' = False
| last l < (head' "doesContainsOrd" l') = False
| (head' "doesContainsOrd" l') `elem` l = True
| otherwise = doesContainsOrd l (tail l')
-- | To filter nested Sets of a
filterNestedSets :: Eq a => Set a -> [Set a] -> [Set a] -> [Set a]
filterNestedSets h l l'
| null l = if doesAnySetContains h l l'
then l'
else h : l'
| doesAnySetContains h l l' = filterNestedSets (head' "filterNestedSets1" l) (tail l) l'
| otherwise = filterNestedSets (head' "filterNestedSets2" l) (tail l) (h : l')
-- | To get the good pair of keys (x,y) or (y,x) in a given Map (a,b) c
getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
getKeyPair (x,y) m = case findPair (x,y) m of
Nothing -> panic "[ERR][Viz.Phylo.Example.getKeyPair] Nothing"
Just i -> i
where
--------------------------------------
findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
findPair (x',y') m'
| member (x',y') m' = Just (x',y')
| member (y',x') m' = Just (y',x')
| otherwise = Nothing
--------------------------------------
-- | To filter Fis with small Support but by keeping non empty Periods
keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
keepFilled f thr l = if (null $ f thr l) && (not $ null l)
then keepFilled f (thr - 1) l
else f thr l
-- | To get all combinations of a list
listToFullCombi :: Eq a => [a] -> [(a,a)]
listToFullCombi l = [(x,y) | x <- l, y <- l]
-- | To get all combinations of a list
listToDirectedCombi :: Eq a => [a] -> [(a,a)]
listToDirectedCombi l = [(x,y) | x <- l, y <- l, x /= y]
listToEqualCombi :: Eq a => [a] -> [(a,a)]
listToEqualCombi l = [(x,y) | x <- l, y <- l, x == y]
listToPairs :: Eq a => [a] -> [(a,a)]
listToPairs l = (listToEqualCombi l) ++ (listToUnDirectedCombi l)
-- | To get all combinations of a list and apply a function to the resulting list of pairs
listToDirectedCombiWith :: Eq a => forall b. (a -> b) -> [a] -> [(b,b)]
listToDirectedCombiWith f l = [(f x,f y) | x <- l, y <- l, x /= y]
-- | To get the sequential combinations of an order list
listToSequentialCombi :: Eq a => [a] -> [(a,a)]
listToSequentialCombi l = nubBy (\x y -> fst x == fst y) $ listToUnDirectedCombi l
-- | To get all combinations of a list with no repetition
listToUnDirectedCombi :: [a] -> [(a,a)]
listToUnDirectedCombi l = [ (x,y) | (x:rest) <- tails l, y <- rest ]
-- | To get all combinations of a list with no repetition and apply a function to the resulting list of pairs
listToUnDirectedCombiWith :: forall a b. (a -> b) -> [a] -> [(b,b)]
listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ]
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b
unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
then (y,x)
else (x,y) ) m1
---------------
-- | Phylo | --
---------------
-- | An analyzer ingests a Ngrams and generates a modified version of it
phyloAnalyzer :: Ngrams -> Ngrams
phyloAnalyzer n = toLower n
-- | To init the foundation roots of the Phylo as a Vector of Ngrams
initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l
-- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> Map Date Double -> Map Date (Map (Int,Int) Double) -> Map (Date,Date) [PhyloFis] -> PhyloParam -> Phylo
initPhyloBase pds fds nbDocs cooc fis prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) nbDocs cooc fis prm
-- | To init the param of a Phylo
initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam
initPhyloParam (def defaultPhyloVersion -> v) (def defaultSoftware -> s) (def defaultQueryBuild -> q) = PhyloParam v s q
-- | To get the last computed Level in a Phylo
getLastLevel :: Phylo -> Level
getLastLevel p = (last . sort)
$ map (snd . getPhyloLevelId)
$ view ( phylo_periods
. traverse
. phylo_periodLevels ) p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc :: Phylo -> Map Date (Map (Int,Int) Double)
getPhyloCooc p = p ^. phylo_cooc
-- | To get the PhyloParam of a Phylo
getPhyloParams :: Phylo -> PhyloParam
getPhyloParams = _phylo_param
-- | To get the title of a Phylo
getPhyloTitle :: Phylo -> Text
getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame :: Phylo -> Int
getPhyloMatchingFrame p = _q_interTemporalMatchingFrame $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrameTh :: Phylo -> Double
getPhyloMatchingFrameTh p = _q_interTemporalMatchingFrameTh $ _phyloParam_query $ getPhyloParams p
getPhyloProximity :: Phylo -> Proximity
getPhyloProximity p = _q_interTemporalMatching $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchThr :: Phylo -> Double
getPhyloReBranchThr p = _q_reBranchThr $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchNth :: Phylo -> Int
getPhyloReBranchNth p = _q_reBranchNth $ _phyloParam_query $ getPhyloParams p
getPhyloFis :: Phylo -> Map (Date,Date) [PhyloFis]
getPhyloFis = _phylo_fis
--------------------
-- | PhyloRoots | --
--------------------
-- | To get the foundations of a Phylo
getFoundations :: Phylo -> PhyloFoundations
getFoundations = _phylo_foundations
-- | To get the foundations roots of a Phylo
getFoundationsRoots :: Phylo -> Vector Ngrams
getFoundationsRoots p = (getFoundations p) ^. phylo_foundationsRoots
-- | To get the Index of a Ngrams in the foundationsRoots of a Phylo
getIdxInRoots :: Ngrams -> Phylo -> Int
getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
getIdxInRoots' :: Ngrams -> Vector Ngrams -> Int
getIdxInRoots' n root = case (elemIndex n root) of
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInVector] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
--------------------
-- | PhyloGroup | --
--------------------
-- | To alter a PhyloGroup matching a given Level
alterGroupWithLevel :: (PhyloGroup -> PhyloGroup) -> Level -> Phylo -> Phylo
alterGroupWithLevel f lvl p = over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. phylo_levelGroups
. traverse
) (\g -> if getGroupLevel g == lvl
then f g
else g ) p
-- | To alter each list of PhyloGroups following a given function
alterPhyloGroups :: ([PhyloGroup] -> [PhyloGroup]) -> Phylo -> Phylo
alterPhyloGroups f p = over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. phylo_levelGroups
) f p
-- | To filter the PhyloGroup of a Phylo according to a function and a value
filterGroups :: Eq a => (PhyloGroup -> a) -> a -> [PhyloGroup] -> [PhyloGroup]
filterGroups f x l = filter (\g -> (f g) == x) l
-- | To maybe get the PhyloBranchId of a PhyloGroup
getGroupBranchId :: PhyloGroup -> Maybe PhyloBranchId
getGroupBranchId = _phylo_groupBranchId
-- | To get the PhyloGroups Childs of a PhyloGroup
getGroupChilds :: PhyloGroup -> Phylo -> [PhyloGroup]
getGroupChilds g p = getGroupsFromIds (getGroupPeriodChildsId g) p
-- | To get the id of a PhyloGroup
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId
-- | To get the level child pointers of a PhyloGroup
getGroupLevelChilds :: PhyloGroup -> [Pointer]
getGroupLevelChilds = _phylo_groupLevelChilds
-- | To get the PhyloGroups Level Childs Ids of a PhyloGroup
getGroupLevelChildsId :: PhyloGroup -> [PhyloGroupId]
getGroupLevelChildsId g = map fst $ getGroupLevelChilds g
-- | To get the level parent pointers of a PhyloGroup
getGroupLevelParents :: PhyloGroup -> [Pointer]
getGroupLevelParents = _phylo_groupLevelParents
-- | To get the PhyloGroups Level Parents Ids of a PhyloGroup
getGroupLevelParentsId :: PhyloGroup -> [PhyloGroupId]
getGroupLevelParentsId g = map fst $ getGroupLevelParents g
-- | To get the PhyloGroups Level Parents Ids of a PhyloGroup
getGroupLevelParentId :: PhyloGroup -> PhyloGroupId
getGroupLevelParentId g = (head' "getGroupLevelParentId") $ getGroupLevelParentsId g
-- | To get the Meta value of a PhyloGroup
getGroupMeta :: Text -> PhyloGroup -> Double
getGroupMeta k g = (g ^. phylo_groupMeta) ! k
-- | To get the Ngrams of a PhyloGroup
getGroupNgrams :: PhyloGroup -> [Int]
getGroupNgrams = _phylo_groupNgrams
-- | To get the list of pairs (Childs & Parents) of a PhyloGroup
getGroupPairs :: PhyloGroup -> Phylo -> [PhyloGroup]
getGroupPairs g p = (getGroupChilds g p) ++ (getGroupParents g p)
-- | To get the PhyloGroups Parents of a PhyloGroup
getGroupParents :: PhyloGroup -> Phylo -> [PhyloGroup]
getGroupParents g p = getGroupsFromIds (getGroupPeriodParentsId g) p
-- | To get the period out of the id of a PhyloGroup
getGroupPeriod :: PhyloGroup -> (Date,Date)
getGroupPeriod = fst . fst . getGroupId
-- | To get the period child pointers of a PhyloGroup
getGroupPeriodChilds :: PhyloGroup -> [Pointer]
getGroupPeriodChilds = _phylo_groupPeriodChilds
-- | To get the PhyloGroups Period Parents Ids of a PhyloGroup
getGroupPeriodChildsId :: PhyloGroup -> [PhyloGroupId]
getGroupPeriodChildsId g = map fst $ getGroupPeriodChilds g
-- | To get the period parent pointers of a PhyloGroup
getGroupPeriodParents :: PhyloGroup -> [Pointer]
getGroupPeriodParents = _phylo_groupPeriodParents
-- | To get the PhyloGroups Period Parents Ids of a PhyloGroup
getGroupPeriodParentsId :: PhyloGroup -> [PhyloGroupId]
getGroupPeriodParentsId g = map fst $ getGroupPeriodParents g
-- | To get the pointers of a given Phylogroup
getGroupPointers :: EdgeType -> Filiation -> PhyloGroup -> [Pointer]
getGroupPointers t f g = case t of
PeriodEdge -> case f of
Ascendant -> getGroupPeriodParents g
Descendant -> getGroupPeriodChilds g
_ -> panic "[ERR][Viz.Phylo.Tools.getGroupPointers] wrong filiation"
LevelEdge -> case f of
Ascendant -> getGroupLevelParents g
Descendant -> getGroupLevelChilds g
_ -> panic "[ERR][Viz.Phylo.Tools.getGroupPointers] wrong filiation"
-- | To get the roots labels of a list of group ngrams
getGroupText :: PhyloGroup -> Phylo -> [Text]
getGroupText g p = ngramsToText (getFoundationsRoots p) (getGroupNgrams g)
-- | To get all the PhyloGroup of a Phylo
getGroups :: Phylo -> [PhyloGroup]
getGroups = view ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. phylo_levelGroups
)
-- | To get all PhyloGroups matching a list of PhyloGoupIds in a Phylo
-- getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
-- getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p
getGroupFromId :: PhyloGroupId -> Phylo -> PhyloGroup
getGroupFromId id p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in groups ! id
getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
getGroupsFromIds ids p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in elems $ restrictKeys groups (Set.fromList ids)
-- | To get the corresponding list of PhyloGroups from a list of PhyloNodes
getGroupsFromNodes :: [PhyloNode] -> Phylo -> [PhyloGroup]
getGroupsFromNodes ns p = getGroupsFromIds (map getNodeId ns) p
-- | To get all the PhyloGroup of a Phylo with a given level and period
getGroupsWithFilters :: Int -> (Date,Date) -> Phylo -> [PhyloGroup]
getGroupsWithFilters lvl prd p = (getGroupsWithLevel lvl p)
`intersect`
(getGroupsWithPeriod prd p)
-- | To get all the PhyloGroup of a Phylo with a given Level
getGroupsWithLevel :: Int -> Phylo -> [PhyloGroup]
getGroupsWithLevel lvl p = filterGroups getGroupLevel lvl (getGroups p)
-- | To get all the PhyloGroup of a Phylo with a given Period
getGroupsWithPeriod :: (Date,Date) -> Phylo -> [PhyloGroup]
getGroupsWithPeriod prd p = filterGroups getGroupPeriod prd (getGroups p)
-- | To create a PhyloGroup in a Phylo out of a list of Ngrams and a set of parameters
initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
(((from', to'), lvl), idx)
lbl
idxs
(Map.empty)
(Map.empty)
Nothing
(getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p))
[] [] [] []
where
idxs = sort $ map (\x -> getIdxInRoots x p) ngrams
-- | To sum two coocurency Matrix
sumCooc :: Map (Int, Int) Double -> Map (Int, Int) Double -> Map (Int, Int) Double
sumCooc m m' = unionWith (+) m m'
-- | To build the mini cooc matrix of each group
getMiniCooc :: [(Int,Int)] -> Set Date -> Map Date (Map (Int,Int) Double) -> Map (Int,Int) Double
getMiniCooc pairs years cooc = filterWithKey (\(n,n') _ -> elem (n,n') pairs) cooc'
where
--------------------------------------
cooc' :: Map (Int,Int) Double
cooc' = foldl (\m m' -> sumCooc m m') empty
$ elems
$ restrictKeys cooc years
--------------------------------------
---------------------
-- | PhyloPeriod | --
---------------------
-- | To alter each PhyloPeriod of a Phylo following a given function
alterPhyloPeriods :: (PhyloPeriod -> PhyloPeriod) -> Phylo -> Phylo
alterPhyloPeriods f p = over ( phylo_periods
. traverse) f p
-- | To append a list of PhyloPeriod to a Phylo
appendToPhyloPeriods :: [PhyloPeriod] -> Phylo -> Phylo
appendToPhyloPeriods l p = over (phylo_periods) (++ l) p
-- | To get all the PhyloPeriodIds of a Phylo
getPhyloPeriods :: Phylo -> [PhyloPeriodId]
getPhyloPeriods p = map _phylo_periodId
$ view (phylo_periods) p
-- | To get the id of a given PhyloPeriod
getPhyloPeriodId :: PhyloPeriod -> PhyloPeriodId
getPhyloPeriodId prd = _phylo_periodId prd
-- | To create a PhyloPeriod
initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod id l = PhyloPeriod id l
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
$ map (\(d,d') -> [d..d']) periods
--------------------
-- | PhyloLevel | --
--------------------
-- | To alter a list of PhyloLevels following a given function
alterPhyloLevels :: ([PhyloLevel] -> [PhyloLevel]) -> Phylo -> Phylo
alterPhyloLevels f p = over ( phylo_periods
. traverse
. phylo_periodLevels) f p
-- | To get the PhylolevelId of a given PhyloLevel
getPhyloLevelId :: PhyloLevel -> PhyloLevelId
getPhyloLevelId = _phylo_levelId
-- | To get all the Phylolevels of a given PhyloPeriod
getPhyloLevels :: PhyloPeriod -> [PhyloLevel]
getPhyloLevels = view (phylo_periodLevels)
-- | To create a PhyloLevel
initPhyloLevel :: PhyloLevelId -> [PhyloGroup] -> PhyloLevel
initPhyloLevel id groups = PhyloLevel id groups
-- | To set the LevelId of a PhyloLevel and of all its PhyloGroups
setPhyloLevelId :: Int -> PhyloLevel -> PhyloLevel
setPhyloLevelId lvl' (PhyloLevel (id, _lvl) groups)
= PhyloLevel (id, lvl') groups'
where
groups' = over (traverse . phylo_groupId)
(\((period, _lvl), idx) -> ((period, lvl'), idx))
groups
------------------
-- | PhyloFis | --
------------------
-- | To get the clique of a PhyloFis
getClique :: PhyloFis -> Clique
getClique = _phyloFis_clique
-- | To get the support of a PhyloFis
getSupport :: PhyloFis -> Support
getSupport = _phyloFis_support
-- | To get the period of a PhyloFis
getFisPeriod :: PhyloFis -> (Date,Date)
getFisPeriod = _phyloFis_period
----------------------------
-- | PhyloNodes & Edges | --
----------------------------
-- | To alter a PhyloNode
alterPhyloNode :: (PhyloNode -> PhyloNode) -> PhyloView -> PhyloView
alterPhyloNode f v = over ( pv_nodes
. traverse
) (\pn -> f pn ) v
-- | To filter some GroupEdges with a given threshold
filterGroupEdges :: Double -> [GroupEdge] -> [GroupEdge]
filterGroupEdges thr edges = filter (\((_s,_t),w) -> w > thr) edges
-- | To get the neighbours (directed/undirected) of a PhyloGroup from a list of GroupEdges
getNeighbours :: Bool -> PhyloGroup -> [GroupEdge] -> [PhyloGroup]
getNeighbours directed g e = case directed of
True -> map (\((_s,t),_w) -> t)
$ filter (\((s,_t),_w) -> s == g) e
False -> map (\((s,t),_w) -> (head' "getNeighbours") $ delete g $ nub [s,t,g])
$ filter (\((s,t),_w) -> s == g || t == g) e
-- | To get the PhyloBranchId of PhyloNode if it exists
getNodeBranchId :: PhyloNode -> PhyloBranchId
getNodeBranchId n = case n ^. pn_bid of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getNodeBranchId] branchId not found"
Just i -> i
-- | To get the PhyloGroupId of a PhyloNode
getNodeId :: PhyloNode -> PhyloGroupId
getNodeId n = n ^. pn_id
getNodePeriod :: PhyloNode -> (Date,Date)
getNodePeriod n = fst $ fst $ getNodeId n
-- | To get the Level of a PhyloNode
getNodeLevel :: PhyloNode -> Level
getNodeLevel n = (snd . fst) $ getNodeId n
-- | To get the Parent Node of a PhyloNode in a PhyloView
getNodeParent :: PhyloNode -> PhyloView -> [PhyloNode]
getNodeParent n v = filter (\n' -> elem (getNodeId n') (getNodeParentsId n))
$ v ^. pv_nodes
-- | To get the Parent Node id of a PhyloNode if it exists
getNodeParentsId :: PhyloNode -> [PhyloGroupId]
getNodeParentsId n = case n ^. pn_parents of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getNodeParentsId] node parent not found"
Just ids -> ids
-- | To get a list of PhyloNodes grouped by PhyloBranch in a PhyloView
getNodesByBranches :: PhyloView -> [(PhyloBranchId,[PhyloNode])]
getNodesByBranches v = zip bIds $ map (\id -> filter (\n -> (getNodeBranchId n) == id)
$ getNodesInBranches v ) bIds
where
--------------------------------------
bIds :: [PhyloBranchId]
bIds = getViewBranchIds v
--------------------------------------
-- | To get a list of PhyloNodes owned by any PhyloBranches in a PhyloView
getNodesInBranches :: PhyloView -> [PhyloNode]
getNodesInBranches v = filter (\n -> isJust $ n ^. pn_bid)
$ v ^. pv_nodes
-- | To get the PhyloGroupId of the Source of a PhyloEdge
getSourceId :: PhyloEdge -> PhyloGroupId
getSourceId e = e ^. pe_source
-- | To get the PhyloGroupId of the Target of a PhyloEdge
getTargetId :: PhyloEdge -> PhyloGroupId
getTargetId e = e ^. pe_target
---------------------
-- | PhyloBranch | --
---------------------
-- | To get the PhyloBranchId of a PhyloBranch
getBranchId :: PhyloBranch -> PhyloBranchId
getBranchId b = b ^. pb_id
-- | To get a list of PhyloBranchIds
getBranchIds :: Phylo -> [PhyloBranchId]
getBranchIds p = sortOn snd
$ nub
$ mapMaybe getGroupBranchId
$ getGroups p
-- | To get a list of PhyloBranchIds given a Level in a Phylo
getBranchIdsWith :: Level -> Phylo -> [PhyloBranchId]
getBranchIdsWith lvl p = sortOn snd
$ mapMaybe getGroupBranchId
$ getGroupsWithLevel lvl p
-- | To get the Meta value of a PhyloBranch
getBranchMeta :: Text -> PhyloBranch -> [Double]
getBranchMeta k b = (b ^. pb_metrics) ! k
-- | To get all the PhyloBranchIds of a PhyloView
getViewBranchIds :: PhyloView -> [PhyloBranchId]
getViewBranchIds v = map getBranchId $ v ^. pv_branches
-- | To get a list of PhyloGroup sharing the same PhyloBranchId
getGroupsByBranches :: Phylo -> [(PhyloBranchId,[PhyloGroup])]
getGroupsByBranches p = zip (getBranchIds p)
$ map (\id -> filter (\g -> (fromJust $ getGroupBranchId g) == id)
$ getGroupsInBranches p)
$ getBranchIds p
-- | To get the sublist of all the PhyloGroups linked to a branch
getGroupsInBranches :: Phylo -> [PhyloGroup]
getGroupsInBranches p = filter (\g -> isJust $ g ^. phylo_groupBranchId)
$ getGroups p
--------------------------------
-- | PhyloQuery & QueryView | --
--------------------------------
-- | To filter PhyloView's Branches by level
filterBranchesByLevel :: Level -> PhyloView -> [PhyloBranch]
filterBranchesByLevel lvl pv = filter (\pb -> lvl == (fst $ pb ^. pb_id))
$ pv ^. pv_branches
-- | To filter PhyloView's Edges by level
filterEdgesByLevel :: Level -> [PhyloEdge] -> [PhyloEdge]
filterEdgesByLevel lvl pes = filter (\pe -> (lvl == ((snd . fst) $ pe ^. pe_source))
&& (lvl == ((snd . fst) $ pe ^. pe_target))) pes
-- | To filter PhyloView's Edges by type
filterEdgesByType :: EdgeType -> [PhyloEdge] -> [PhyloEdge]
filterEdgesByType t pes = filter (\pe -> t == (pe ^. pe_type)) pes
-- | To filter PhyloView's Nodes by the oldest Period
filterNodesByFirstPeriod :: [PhyloNode] -> [PhyloNode]
filterNodesByFirstPeriod pns = filter (\pn -> fstPrd == ((fst . fst) $ pn ^. pn_id)) pns
where
--------------------------------------
fstPrd :: (Date,Date)
fstPrd = (head' "filterNodesByFirstPeriod")
$ sortOn fst
$ map (\pn -> (fst . fst) $ pn ^. pn_id) pns
--------------------------------------
-- | To filter PhyloView's Nodes by Branch
filterNodesByBranch :: PhyloBranchId -> [PhyloNode] -> [PhyloNode]
filterNodesByBranch bId pns = filter (\pn -> if isJust $ pn ^. pn_bid
then if bId == (fromJust $ pn ^. pn_bid)
then True
else False
else False ) pns
-- | To filter PhyloView's Nodes by level
filterNodesByLevel :: Level -> [PhyloNode] -> [PhyloNode]
filterNodesByLevel lvl pns = filter (\pn -> lvl == ((snd . fst) $ pn ^. pn_id)) pns
-- | To filter PhyloView's Nodes by Period
filterNodesByPeriod :: PhyloPeriodId -> [PhyloNode] -> [PhyloNode]
filterNodesByPeriod prd pns = filter (\pn -> prd == ((fst . fst) $ pn ^. pn_id)) pns
-- | To get the first clustering method to apply to get the contextual units of a Phylo
getContextualUnit :: PhyloQueryBuild -> Cluster
getContextualUnit q = q ^. q_contextualUnit
-- | To get the metrics to apply to contextual units
getContextualUnitMetrics :: PhyloQueryBuild -> [Metric]
getContextualUnitMetrics q = q ^. q_contextualUnitMetrics
-- | To get the filters to apply to contextual units
getContextualUnitFilters :: PhyloQueryBuild -> [Filter]
getContextualUnitFilters q = q ^. q_contextualUnitFilters
-- | To get the cluster methods to apply to the Nths levels of a Phylo
getNthCluster :: PhyloQueryBuild -> Cluster
getNthCluster q = q ^. q_nthCluster
-- | To get the Sup Level of a reconstruction of a Phylo from a PhyloQuery
getNthLevel :: PhyloQueryBuild -> Level
getNthLevel q = q ^. q_nthLevel
-- | To get the Grain of the PhyloPeriods from a PhyloQuery
getPeriodGrain :: PhyloQueryBuild -> Int
getPeriodGrain q = q ^. q_periodGrain
-- | To get the intertemporal matching strategy to apply to a Phylo from a PhyloQuery
getInterTemporalMatching :: PhyloQueryBuild -> Proximity
getInterTemporalMatching q = q ^. q_interTemporalMatching
-- | To get the Steps of the PhyloPeriods from a PhyloQuery
getPeriodSteps :: PhyloQueryBuild -> Int
getPeriodSteps q = q ^. q_periodSteps
--------------------------------------------------
-- | PhyloQueryBuild & PhyloQueryView Constructors | --
--------------------------------------------------
-- | To get the threshold of a Proximity
getThreshold :: Proximity -> Double
getThreshold prox = case prox of
WeightedLogJaccard (WLJParams thr _) -> thr
Hamming (HammingParams thr) -> thr
Filiation -> panic "[ERR][Viz.Phylo.Tools.getThreshold] Filiation"
-- | To get the Proximity associated to a given Clustering method
getProximity :: Cluster -> Proximity
getProximity cluster = case cluster of
Louvain (LouvainParams proxi) -> proxi
RelatedComponents (RCParams proxi) -> proxi
_ -> panic "[ERR][Viz.Phylo.Tools.getProximity] this cluster has no associated Proximity"
-- | To initialize all the Cluster / Proximity with their default parameters
initFis :: Maybe Bool -> Maybe Support -> Maybe Int -> FisParams
initFis (def True -> kmf) (def 0 -> min') (def 0 -> thr) = FisParams kmf min' thr
initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens
initLonelyBranch :: Maybe Int -> Maybe Int -> Maybe Int -> LBParams
initLonelyBranch (def 2 -> periodsInf) (def 2 -> periodsSup) (def 1 -> minNodes) = LBParams periodsInf periodsSup minNodes
initSizeBranch :: Maybe Int -> SBParams
initSizeBranch (def 1 -> minSize) = SBParams minSize
initLonelyBranch' :: Maybe Int -> Maybe Int -> Maybe Int -> LBParams
initLonelyBranch' (def 0 -> periodsInf) (def 0 -> periodsSup) (def 1 -> minNodes) = LBParams periodsInf periodsSup minNodes
initLouvain :: Maybe Proximity -> LouvainParams
initLouvain (def defaultWeightedLogJaccard -> proxi) = LouvainParams proxi
initRelatedComponents :: Maybe Proximity -> RCParams
initRelatedComponents (def defaultWeightedLogJaccard -> proxi) = RCParams proxi
-- | TODO user param in main function
initWeightedLogJaccard :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogJaccard (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int
-> Maybe Int -> Maybe Cluster -> Maybe [Metric]
-> Maybe [Filter]-> Maybe Proximity -> Maybe Int
-> Maybe Double -> Maybe Double -> Maybe Int
-> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain)
(def 1 -> steps) (def defaultFis -> cluster) (def [] -> metrics)
(def [] -> filters) (def defaultWeightedLogJaccard -> matching') (def 5 -> frame)
(def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth)
(def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain
steps cluster metrics filters matching' frame frameThr reBranchThr reBranchNth nthLevel nthCluster
-- | To initialize a PhyloQueryView default parameters
initPhyloQueryView :: Maybe Level -> Maybe Filiation -> Maybe Bool -> Maybe Level -> Maybe [Metric] -> Maybe [Filter] -> Maybe [Tagger] -> Maybe (Sort, Order) -> Maybe ExportMode -> Maybe DisplayMode -> Maybe Bool -> PhyloQueryView
initPhyloQueryView (def 2 -> lvl) (def Descendant -> f) (def False -> c) (def 1 -> d) (def [] -> ms) (def [] -> fs) (def [] -> ts) s (def Json -> em) (def Flat -> dm) (def True -> v) =
PhyloQueryView lvl f c d ms fs ts s em dm v
-- | To define some obvious boolean getters
shouldKeepMinorFis :: FisParams -> Bool
shouldKeepMinorFis = _fis_keepMinorFis
----------------------------
-- | Default ressources | --
----------------------------
-- Clusters
defaultFis :: Cluster
defaultFis = Fis (initFis Nothing Nothing Nothing)
defaultLouvain :: Cluster
defaultLouvain = Louvain (initLouvain Nothing)
defaultRelatedComponents :: Cluster
defaultRelatedComponents = RelatedComponents (initRelatedComponents Nothing)
-- Filters
defaultLonelyBranch :: Filter
defaultLonelyBranch = LonelyBranch (initLonelyBranch Nothing Nothing Nothing)
defaultSizeBranch :: Filter
defaultSizeBranch = SizeBranch (initSizeBranch Nothing)
-- Params
defaultPhyloParam :: PhyloParam
defaultPhyloParam = initPhyloParam Nothing Nothing Nothing
-- Proximities
defaultHamming :: Proximity
defaultHamming = Hamming (initHamming Nothing)
defaultWeightedLogJaccard :: Proximity
defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing Nothing)
-- Queries
type Title = Text
type Desc = Text
defaultQueryBuild :: PhyloQueryBuild
defaultQueryBuild = defaultQueryBuild'
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
defaultQueryBuild' :: Title -> Desc -> PhyloQueryBuild
defaultQueryBuild' t d = initPhyloQueryBuild t d
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing
-- Software
defaultSoftware :: Software
defaultSoftware = Software "Gargantext" "v4"
-- Version
defaultPhyloVersion :: Text
defaultPhyloVersion = "v1"
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Display
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (null,(++),sortOn)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To transform a flat Phyloview into a nested Phyloview
toNestedView :: [PhyloNode] -> [PhyloNode] -> [PhyloNode]
toNestedView ns ns'
| null ns' = ns
| otherwise = toNestedView (filter (\n -> lvl' == getNodeLevel n) nested)
(filter (\n -> lvl' < getNodeLevel n) nested)
where
--------------------------------------
lvl' :: Level
lvl' = getNodeLevel $ head' "toNestedView" nested
--------------------------------------
nested :: [PhyloNode]
nested = foldl (\ns'' n -> let nIds' = getNodeParentsId n
in map (\n' -> if elem (getNodeId n') nIds'
then n' & pn_childs %~ (++ [n])
else n') ns'') ns' ns
--------------------------------------
-- | To process a DisplayMode to a PhyloView
processDisplay :: DisplayMode -> ExportMode -> PhyloView -> PhyloView
processDisplay d e v = case e of
Json -> case d of
Flat -> v
Nested -> let ns = sortOn getNodeLevel $ v ^. pv_nodes
lvl = getNodeLevel $ head' "processDisplay" ns
in v & pv_nodes .~ toNestedView (filter (\n -> lvl == getNodeLevel n) ns)
(filter (\n -> lvl < getNodeLevel n) ns)
_ -> v
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Export
where
import Control.Lens hiding (Level)
import Control.Monad
import Data.GraphViz hiding (DotGraph)
import Data.GraphViz.Attributes.Complete hiding (EdgeType)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic
import Data.List ((++),unwords,concat,sortOn,nub)
import Data.Map (Map,toList,(!))
import Data.Maybe (isNothing)
import Data.Text.Lazy (fromStrict, pack, unpack)
import qualified Data.Text as T
import qualified Data.Text.Lazy as T'
import qualified Data.GraphViz.Attributes.HTML as H
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo hiding (Dot)
import Gargantext.Core.Viz.Phylo.Tools
-- import Debug.Trace (trace)
import Prelude (writeFile)
import System.FilePath
type DotId = T'.Text
---------------------
-- | Dot to File | --
---------------------
dotToFile :: FilePath -> DotGraph DotId -> IO ()
dotToFile filePath dotG = writeFile filePath $ dotToString dotG
dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
--------------------------
-- | PhyloView to DOT | --
--------------------------
-- | From http://haroldcarr.com/posts/2014-02-28-using-graphviz-via-haskell.html & https://hackage.haskell.org/package/graphviz
-- | To create a custom Graphviz's Attribute
setAttr :: AttributeName -> T'.Text -> CustomAttribute
setAttr k v = customAttribute k v
-- | To create customs Graphviz's Attributes out of some Metrics
setAttrFromMetrics :: Map T.Text [Double] -> [CustomAttribute]
setAttrFromMetrics a = map (\(k,v) -> setAttr (fromStrict k)
$ (pack . unwords)
$ map show v) $ toList a
-- | To transform a PhyloBranchId into a DotId
toBranchDotId :: PhyloBranchId -> DotId
toBranchDotId (lvl,idx) = fromStrict $ T.pack $ (show lvl) ++ (show idx)
-- | To transform a PhyloGroupId into a DotId
toNodeDotId :: PhyloGroupId -> DotId
toNodeDotId (((d,d'),lvl),idx) = fromStrict $ T.pack $ (show d) ++ (show d') ++ (show lvl) ++ (show idx)
-- | To transform a PhyloPeriodId into a DotId
toPeriodDotId :: PhyloPeriodId -> DotId
toPeriodDotId (d,d') = fromStrict $ T.pack $ (show d) ++ (show d')
-- | To transform a PhyloPeriodId into a Graphviz's label
toPeriodDotLabel ::PhyloPeriodId -> Label
toPeriodDotLabel (d,d') = toDotLabel $ T.pack $ (show d) ++ " " ++ (show d')
-- | To get all the Phyloperiods covered by a PhyloView
getViewPeriods :: PhyloView -> [PhyloPeriodId]
getViewPeriods pv = sortOn fst $ nub $ map (\pn -> (fst . fst) $ pn ^. pn_id) $ pv ^. pv_nodes
-- | To get for each PhyloBranch, their corresponding oldest PhyloNodes
getFirstNodes :: Level -> PhyloView -> [(PhyloBranchId,PhyloGroupId)]
getFirstNodes lvl pv = concat
$ map (\bId -> map (\pn -> (bId,pn ^. pn_id))
$ filterNodesByFirstPeriod
$ filterNodesByBranch bId
$ filterNodesByLevel lvl
$ pv ^. pv_nodes) bIds
where
--------------------------------------
bIds :: [PhyloBranchId]
bIds = map getBranchId $ filterBranchesByLevel lvl pv
--------------------------------------
-- | To transform a Text into a Graphviz's Label
toDotLabel :: T.Text -> Label
toDotLabel lbl = StrLabel $ fromStrict lbl
-- | To set a Peak Node
setPeakDotNode :: PhyloBranch -> Dot DotId
setPeakDotNode pb = node (toBranchDotId $ pb ^. pb_id)
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ pb ^. pb_peak)]
<> (setAttrFromMetrics $ pb ^. pb_metrics)
<> [ setAttr "nodeType" "peak"
, setAttr "branchId" ((pack $ show (fst $ getBranchId pb)) <> (pack $ show (snd $ getBranchId pb)))
])
-- | To set a Peak Edge
setPeakDotEdge :: DotId -> DotId -> Dot DotId
setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])]
colorFromDynamics :: Double -> H.Attribute
colorFromDynamics d
| d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor SkyBlue)
| otherwise = H.Color (toColor Black)
getGroupDynamic :: [Double] -> H.Attribute
getGroupDynamic dy
| elem 0 dy = colorFromDynamics 0
| elem 1 dy = colorFromDynamics 1
| elem 2 dy = colorFromDynamics 2
| otherwise = colorFromDynamics 3
-- | To set an HTML table
setHtmlTable :: PhyloNode -> H.Label
setHtmlTable pn = H.Table H.HTable
{ H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
, H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
, H.tableRows = [header]
<> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
<> (if isNothing $ pn ^. pn_ngrams
then []
else map ngramsToRow $ splitEvery 4
$ reverse $ sortOn (snd . snd)
$ zip (fromJust $ pn ^. pn_ngrams) $ zip dynamics inclusion) }
where
--------------------------------------
ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,(d,_)) -> H.LabelCell [H.Align H.HLeft,colorFromDynamics d]
$ H.Text [H.Str $ fromStrict n]) ns
--------------------------------------
inclusion :: [Double]
inclusion = (pn ^. pn_metrics) ! "inclusion"
--------------------------------------
dynamics :: [Double]
dynamics = (pn ^. pn_metrics) ! "dynamics"
--------------------------------------
header :: H.Row
header = H.Cells [H.LabelCell [getGroupDynamic dynamics]
$ H.Text [H.Str $ (((fromStrict . T.toUpper) $ pn ^. pn_label)
<> (fromStrict " ( ")
<> (pack $ show (fst $ getNodePeriod pn))
<> (fromStrict " , ")
<> (pack $ show (snd $ getNodePeriod pn))
<> (fromStrict " ) "))]]
--------------------------------------
-- | To set a Node
setDotNode :: PhyloNode -> Dot DotId
setDotNode pn = node (toNodeDotId $ pn ^. pn_id)
([FontName "Arial", Shape Square, toLabel (setHtmlTable pn)]
<> [ setAttr "nodeType" "group"
, setAttr "from" (pack $ show (fst $ getNodePeriod pn))
, setAttr "to" (pack $ show (fst $ getNodePeriod pn))
, setAttr "branchId" ((pack $ show (fst $ getNodeBranchId pn)) <> (pack $ show (snd $ getNodeBranchId pn)))
])
-- | To set an Edge
setDotEdge :: PhyloEdge -> Dot DotId
setDotEdge pe
| pe ^. pe_weight == 100 = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Red]]
| otherwise = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black], Constraint True]
-- | To set a Period Edge
setDotPeriodEdge :: (PhyloPeriodId,PhyloPeriodId) -> Dot DotId
setDotPeriodEdge (prd,prd') = edge (toPeriodDotId prd) (toPeriodDotId prd') [Width 5, Color [toWColor Black]]
-- | To transform a given PhyloView into the corresponding GraphViz Graph (ie: Dot format)
viewToDot :: PhyloView -> DotGraph DotId
viewToDot pv = digraph ((Str . fromStrict) $ pv ^. pv_title)
$ do
-- set the global graph attributes
graphAttrs ( [Label (toDotLabel $ pv ^. pv_title)]
<> [setAttr "description" $ fromStrict $ pv ^. pv_description]
<> [setAttr "filiation" $ (pack . show) $ pv ^. pv_filiation]
<> (setAttrFromMetrics $ pv ^. pv_metrics)
<> [FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]])
-- set the peaks
subgraph (Str "Peaks") $ do
graphAttrs [Rank SameRank]
mapM setPeakDotNode $ filterBranchesByLevel (pv ^. pv_level) pv
-- set the nodes, period by period
_ <- mapM (\prd ->
subgraph (Str $ fromStrict $ T.pack $ "subGraph " ++ (show $ (fst prd)) ++ (show $ (snd prd)))
$ do
graphAttrs [Rank SameRank]
-- set the period label
node (toPeriodDotId prd) ([Shape Square, FontSize 50, Label (toPeriodDotLabel prd)]
<> [setAttr "nodeType" "period",
setAttr "from" (fromStrict $ T.pack $ (show $ fst prd)),
setAttr "to" (fromStrict $ T.pack $ (show $ snd prd))])
mapM setDotNode $ filterNodesByPeriod prd $ filterNodesByLevel (pv ^. pv_level) (pv ^.pv_nodes)
) $ (pv ^. pv_periods)
-- set the edges : from peaks to nodes, from nodes to nodes, from periods to periods
_ <- mapM (\(bId,nId) -> setPeakDotEdge (toBranchDotId bId) (toNodeDotId nId)) $ getFirstNodes (pv ^. pv_level) pv
_ <- mapM setDotEdge $ filterEdgesByLevel (pv ^. pv_level) $ filterEdgesByType PeriodEdge (pv ^. pv_edges)
mapM setDotPeriodEdge $ listToSequentialCombi $ (pv ^. pv_periods)
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Filters
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (notElem,null,nub,(\\),intersect)
import Data.Maybe (isNothing)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To clean a PhyloView list of Nodes, Edges, etc after having filtered its Branches
cleanNodesEdges :: PhyloView -> PhyloView -> PhyloView
cleanNodesEdges v v' = v' & pv_nodes %~ (filter (\n -> not $ elem (getNodeId n) nIds))
& pv_nodes %~ (map (\n -> if isNothing (n ^. pn_parents)
then n
else if (not .null) $ (getNodeParentsId n) `intersect` nIds
then n & pn_parents .~ Nothing
else n ))
& pv_edges %~ (filter (\e -> (not $ elem (getSourceId e) nIds)
&& (not $ elem (getTargetId e) nIds)))
where
--------------------------------------
nIds :: [PhyloGroupId]
nIds = map getNodeId
$ filter (\n -> elem (getNodeBranchId n) bIds)
$ getNodesInBranches v
--------------------------------------
bIds :: [PhyloBranchId]
bIds = (getViewBranchIds v) \\ (getViewBranchIds v')
--------------------------------------
-- | To filter all the LonelyBranches (ie: isolated one in time & with a small number of nodes) of a PhyloView
filterLonelyBranch :: Int -> Int -> Int -> [PhyloPeriodId] -> PhyloView -> PhyloView
filterLonelyBranch inf sup min' prds v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> let
ns = filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v
prds' = nub $ map (\n -> (fst . fst) $ getNodeId n) ns
in not (isLone ns prds')))
--------------------------------------
isLone :: [PhyloNode] -> [PhyloPeriodId] -> Bool
isLone ns prds' = (length ns <= min')
&& notElem (head' "filterLonelyBranch1" prds') (take inf prds)
&& notElem (head' "filterLonelyBranch2" prds') (take sup $ reverse prds)
--------------------------------------
-- | To filter all the branches with a minimal size in a PhyloView
filterSizeBranch :: Int -> PhyloView -> PhyloView
filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) >= min'))
--------------------------------------
-- | To process a list of QueryFilter to a PhyloView
processFilters :: [Filter] -> Phylo -> PhyloView -> PhyloView
processFilters fs p v = foldl (\v' f -> case f of
LonelyBranch (LBParams inf sup min') -> filterLonelyBranch inf sup min' (getPhyloPeriods p) v'
SizeBranch (SBParams min') -> filterSizeBranch min' v'
-- _ -> panic "[ERR][Viz.Phylo.View.Filters.processFilters] filter not found"
) v fs
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Metrics
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (last,groupBy,sortOn)
import Data.Map (insert)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To add a new meta Metric to a PhyloBranch
addBranchMetrics :: PhyloBranchId -> Text -> Double -> PhyloView -> PhyloView
addBranchMetrics id lbl val v = over (pv_branches
. traverse)
(\b -> if getBranchId b == id
then b & pb_metrics %~ insert lbl [val]
else b) v
branchGroups :: PhyloView -> PhyloView
branchGroups v = foldl (\v' (bId,nb) -> addBranchMetrics bId "nbGroups" nb v') v
$ map (\(bId,ns) -> (bId,fromIntegral $ length ns))
$ getNodesByBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchAge :: PhyloView -> PhyloView
branchAge v = foldl (\v' b -> let bId = (fst . (head' "branchAge")) b
prds = sortOn fst $ map snd b
in addBranchMetrics bId "age" ((abs . fromIntegral) $ ((snd . last) prds) - (fst $ head' "branchAge" prds)) v') v
$ groupBy ((==) `on` fst)
$ sortOn fst
$ map (\n -> (getNodeBranchId n, (fst . fst) $ getNodeId n))
$ getNodesInBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchBirth :: PhyloView -> PhyloView
branchBirth v = foldl (\v' b -> let bId = (fst . (head' "branchBirth")) b
prds = sortOn fst $ map snd b
in addBranchMetrics bId "birth" (fromIntegral $ fst $ head' "branchAge" prds) v') v
$ groupBy ((==) `on` fst)
$ sortOn fst
$ map (\n -> (getNodeBranchId n, (fst . fst) $ getNodeId n))
$ getNodesInBranches v
-- | To process a list of Metrics to a PhyloView
processMetrics :: [Metric] -> Phylo -> PhyloView -> PhyloView
processMetrics ms _p v = foldl (\v' m -> case m of
BranchAge -> branchAge v'
BranchBirth -> branchBirth v'
BranchGroups -> branchGroups v'
-- _ -> panic "[ERR][Viz.Phylo.Example.processMetrics] metric not found"
) v ms
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Sort
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (sortOn)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To sort a PhyloView by Age
sortBranchByAge :: Order -> PhyloView -> PhyloView
sortBranchByAge o v = v & pv_branches %~ f
where
--------------------------------------
f :: [PhyloBranch] -> [PhyloBranch]
f xs = case o of
Asc -> sortOn (getBranchMeta "age") xs
Desc -> reverse $ sortOn (getBranchMeta "age") xs
--------------------------------------
-- | To sort a PhyloView by Birth date of a branch
sortBranchByBirth :: Order -> PhyloView -> PhyloView
sortBranchByBirth o v = v & pv_branches %~ f
where
--------------------------------------
f :: [PhyloBranch] -> [PhyloBranch]
f xs = case o of
Asc -> sortOn (getBranchMeta "birth") xs
Desc -> reverse $ sortOn (getBranchMeta "birth") xs
--------------------------------------
-- | To process a Sort to a PhyloView
processSort :: Maybe (Sort,Order) -> Phylo -> PhyloView -> PhyloView
processSort s _p v = case s of
Nothing -> v
Just s' -> case fst s' of
ByBranchAge -> sortBranchByAge (snd s') v
ByBranchBirth -> sortBranchByBirth (snd s') v
--_ -> panic "[ERR][Viz.Phylo.View.Sort.processSort] sort not found"
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Taggers
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,groupBy,sortOn,sort, (!!), union, (\\))
import Data.Text (Text)
import Data.Vector (Vector)
import Data.Map (Map, (!), empty, unionWith)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.BranchMaker
import Gargantext.Core.Viz.Phylo.Metrics
import qualified Data.Map as Map
import Control.Parallel.Strategies
-- import Debug.Trace (trace)
-- | To get the nth most frequent Ngrams in a list of PhyloGroups
mostFreqNgrams :: Int -> [PhyloGroup] -> [Int]
mostFreqNgrams thr groups = map fst
$ take thr
$ reverse
$ sortOn snd
$ map (\g -> (head' "mostFreqNgrams" g,length g))
$ groupBy (==)
$ (sort . concat)
$ map getGroupNgrams groups
-- | To transform the nth most frequent Ngrams into a label
freqToLabel :: Int -> Vector Ngrams -> [PhyloGroup] -> Text
freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l
-- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup
mostOccNgrams :: Int -> PhyloGroup -> [Int]
mostOccNgrams nth g = (nub . concat)
$ map (\((f,s),_d) -> [f,s])
$ take nth
$ reverse $ sortOn snd
$ Map.toList cooc
where
cooc :: Map (Int, Int) Double
cooc = getGroupCooc g
-- | To alter the peak of a PhyloBranch
alterBranchPeak :: (PhyloBranchId,Text) -> PhyloView -> PhyloView
alterBranchPeak (id,lbl) v = over (pv_branches
. traverse)
(\b -> if getBranchId b == id
then b & pb_peak .~ lbl
else b) v
-- | To set the peak of a PhyloBranch as the nth most frequent terms of its PhyloNodes
branchPeakFreq :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakFreq v thr p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ map (\(id,ns) -> (id, freqToLabel thr (getFoundationsRoots p)
$ getGroupsFromNodes ns p))
$ getNodesByBranches v
branchPeakCooc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ map (\(id,ns) -> (id, ngramsToLabel (getFoundationsRoots p) (getGroupsPeaks (getGroupsFromNodes ns p) nth p) ) )
$ getNodesByBranches v
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
$ reverse
$ sortOn snd $ zip [0..] meta
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p) $ mostOccNgrams thr g
in n & pn_label .~ lbl) v
-- | To set the label of a PhyloNode as the nth most inclusives terms of its PhyloNodes
nodeLabelInc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "inclusion" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p)
$ getNthMostMeta thr ((g ^. phylo_groupNgramsMeta) ! "inclusion") (getGroupNgrams g)
in n & pn_label .~ lbl) v
nodeLabelInc' :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc' v nth p = over (pv_nodes
. traverse)
(\pn -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ take nth
$ map (\(_,(_,idx)) -> idx)
$ concat
$ map (\groups -> sortOn (fst . snd) groups)
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ zip ((pn ^. pn_metrics) ! "inclusion")
$ zip ((pn ^. pn_metrics) ! "dynamics") (pn ^. pn_idx)
in pn & pn_label .~ lbl) v
branchPeakInc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakInc v nth p =
let labels = map (\(id,nodes) ->
let cooc = foldl (\mem pn -> unionWith (+) mem (pn ^. pn_cooc)) empty nodes
ngrams = sort $ foldl (\mem pn -> union mem (pn ^. pn_idx)) [] nodes
inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
lbl = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta nth inc ngrams
in (id, lbl))
$ getNodesByBranches v
labels' = labels `using` parList rdeepseq
in foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v labels'
-- | To process a sorted list of Taggers to a PhyloView
processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView
processTaggers ts p v = foldl (\v' t -> case t of
BranchPeakFreq -> branchPeakFreq v' 2 p
BranchPeakCooc -> branchPeakCooc v' 2 p
BranchPeakInc -> branchPeakInc v' 2 p
GroupLabelInc -> nodeLabelInc v' 2 p
GroupLabelIncDyn -> nodeLabelInc' v' 2 p
GroupLabelCooc -> nodeLabelCooc v' 2 p) v ts
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.ViewMaker
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,(++),sort)
import Data.Text (Text)
import Data.Map (Map, empty, elems, unionWithKey, fromList)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.View.Display
import Gargantext.Core.Viz.Phylo.View.Filters
import Gargantext.Core.Viz.Phylo.View.Metrics
import Gargantext.Core.Viz.Phylo.View.Sort
import Gargantext.Core.Viz.Phylo.View.Taggers
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-- | To init a PhyloBranch
initPhyloBranch :: PhyloBranchId -> Text -> PhyloBranch
initPhyloBranch id lbl = PhyloBranch id lbl empty
-- | To init a PhyloEdge
initPhyloEdge :: PhyloGroupId -> [Pointer] -> EdgeType -> [PhyloEdge]
initPhyloEdge id pts et = map (\pt -> PhyloEdge id (fst pt) et (snd pt)) pts
-- | To init a PhyloView
initPhyloView :: Level -> Text -> Text -> Filiation -> Bool -> Phylo -> PhyloView
initPhyloView lvl lbl dsc fl vb p = PhyloView (getPhyloParams p) lbl dsc fl lvl
(getPhyloPeriods p)
empty
([] ++ (phyloToBranches lvl p))
([] ++ (groupsToNodes True vb (getFoundationsRoots p) gs))
([] ++ (groupsToEdges fl PeriodEdge gs))
where
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
--------------------------------------
-- | To transform a list of PhyloGroups into a list of PhyloNodes
groupsToNodes :: Bool -> Bool -> Vector Ngrams -> [PhyloGroup] -> [PhyloNode]
groupsToNodes isR isV ns gs = map (\g -> let idxs = getGroupNgrams g
in PhyloNode
(getGroupId g)
(getGroupBranchId g)
"" idxs
(if isV
then Just (ngramsToText ns idxs)
else Nothing)
(g ^. phylo_groupNgramsMeta)
(g ^. phylo_groupCooc)
(if (not isR)
then Just (getGroupLevelParentsId g)
else Nothing)
[]
) gs
-- | To merge edges by keeping the maximum weight
mergeEdges :: [PhyloEdge] -> [PhyloEdge] -> [PhyloEdge]
mergeEdges lAsc lDes = elems
$ unionWithKey (\_k vAsc vDes -> vDes & pe_weight .~ (max (vAsc ^. pe_weight) (vDes ^. pe_weight))) mAsc mDes
where
--------------------------------------
mAsc :: Map (PhyloGroupId,PhyloGroupId) PhyloEdge
mAsc = fromList
$ map (\(k,e) -> (k, e & pe_source .~ fst k
& pe_target .~ snd k))
$ zip (map (\e -> (e ^. pe_target,e ^. pe_source)) lAsc) lAsc
--------------------------------------
mDes :: Map (PhyloGroupId,PhyloGroupId) PhyloEdge
mDes = fromList
$ zip (map (\e -> (e ^. pe_source,e ^. pe_target)) lDes) lDes
--------------------------------------
-- | To transform a list of PhyloGroups into a list of PhyloEdges
groupsToEdges :: Filiation -> EdgeType -> [PhyloGroup] -> [PhyloEdge]
groupsToEdges fl et gs = case fl of
Complete -> (groupsToEdges Ascendant et gs) ++ (groupsToEdges Descendant et gs)
Merge -> mergeEdges (groupsToEdges Ascendant et gs) (groupsToEdges Descendant et gs)
_ -> concat
$ map (\g -> case fl of
Ascendant -> case et of
PeriodEdge -> initPhyloEdge (getGroupId g) (getGroupPeriodParents g) et
LevelEdge -> initPhyloEdge (getGroupId g) (getGroupLevelParents g) et
Descendant -> case et of
PeriodEdge -> initPhyloEdge (getGroupId g) (getGroupPeriodChilds g) et
LevelEdge -> initPhyloEdge (getGroupId g) (getGroupLevelChilds g) et
_Type -> panic "[ERR][Viz.Phylo.View.ViewMaker.groupsToEdges] not implemented"
) gs
-- | To transform a Phylo into a list of PhyloBranch for a given Level
phyloToBranches :: Level -> Phylo -> [PhyloBranch]
phyloToBranches lvl p = map (\id -> initPhyloBranch id "") $ nub $ getBranchIdsWith lvl p
-- | To add recursively a list of PhyloNodes and Edges to PhyloView from a given Level and Depth
addChildNodes :: Bool -> Level -> Level -> Bool -> Filiation -> Phylo -> PhyloView -> PhyloView
addChildNodes shouldDo lvl lvlMin vb fl p v =
if (not shouldDo) || (lvl == lvlMin)
then v
else addChildNodes shouldDo (lvl - 1) lvlMin vb fl p
$ v & pv_branches %~ (++ (phyloToBranches (lvl - 1) p))
& pv_nodes %~ (++ (groupsToNodes False vb (getFoundationsRoots p) gs'))
& pv_edges %~ (++ (groupsToEdges fl PeriodEdge gs'))
& pv_edges %~ (++ (groupsToEdges Descendant LevelEdge gs ))
& pv_edges %~ (++ (groupsToEdges Ascendant LevelEdge gs'))
where
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
--------------------------------------
gs' :: [PhyloGroup]
gs' = getGroupsWithLevel (lvl - 1) p
--------------------------------------
-- | To transform a PhyloQuery into a PhyloView
toPhyloView :: PhyloQueryView -> Phylo -> PhyloView
toPhyloView q p = traceView
$ processDisplay (q ^. qv_display) (q ^. qv_export)
$ processSort (q ^. qv_sort ) p
$ processTaggers (q ^. qv_taggers) p
$ processDynamics
$ processFilters (q ^. qv_filters) p
$ processMetrics (q ^. qv_metrics) p
$ addChildNodes (q ^. qv_levelChilds) (q ^. qv_lvl) (q ^. qv_levelChildsDepth) (q ^. qv_verbose) (q ^. qv_filiation) p
$ initPhyloView (q ^. qv_lvl) (getPhyloTitle p) (getPhyloDescription p) (q ^. qv_filiation) (q ^. qv_verbose) p
-----------------
-- | Taggers | --
-----------------
traceView :: PhyloView -> PhyloView
traceView pv = trace ("------------\n--| View |--\n------------\n\n"
<> "view level : " <> show (pv ^. pv_level) <> "\n"
<> show (length $ pv ^. pv_branches) <> " exported branches with " <> show (length $ pv ^. pv_nodes) <> " groups\n"
<> "groups by branches : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") pv
where
lst = sort $ map (fromIntegral . length . snd) $ getNodesByBranches pv
......@@ -23,7 +23,7 @@ module Gargantext.Database.Admin.Types.Hyperdata.Phylo
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Core.Viz.Phylo (Phylo(..))
import Gargantext.Core.Viz.LegacyPhylo (Phylo(..))
------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment