Commit e26527ac authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] merge dev-phylo

parents a2fd553a 3e6c4d4a
......@@ -17,11 +17,10 @@ Adaptative Phylo binaries
module Main where
import Data.Aeson
import Data.ByteString.Lazy (ByteString)
import Data.Maybe (isJust, fromJust)
import Data.List (concat, nub, isSuffixOf, take)
import Data.List (concat, nub, isSuffixOf)
import Data.String (String)
import Data.Text (Text, unwords, unpack)
import Data.Text (Text, unwords, unpack, replace)
import Crypto.Hash.SHA256 (hash)
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
......@@ -31,20 +30,25 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicDistance')
import GHC.IO (FilePath)
import Prelude (Either(..))
import Prelude (Either(Left, Right))
import System.Environment
import System.Directory (listDirectory)
import System.Directory (listDirectory,doesFileExist)
import Control.Concurrent.Async (mapConcurrently)
import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import qualified Data.Text as T
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
---------------
......@@ -66,7 +70,7 @@ getFilesFromPath path = do
-- | To read and decode a Json file
readJson :: FilePath -> IO ByteString
readJson :: FilePath -> IO Lazy.ByteString
readJson path = Lazy.readFile path
......@@ -125,6 +129,92 @@ fileToDocs parser path lst = do
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-- Config time parameters to label
timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of
Year p s f -> ("time"<> "_"<> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: Config -> [Char]
seaToLabel config = case (seaElevation config) of
Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity))
sensToLabel :: Config -> [Char]
sensToLabel config = case (phyloProximity config) of
Hamming -> undefined
WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s)
cliqueToLabel :: Config -> [Char]
cliqueToLabel config = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
syncToLabel :: Config -> [Char]
syncToLabel config = case (phyloSynchrony config) of
ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
ByProximityDistribution _ _ -> undefined
qualToConfig :: Config -> [Char]
qualToConfig config = case (phyloQuality config) of
Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
-- To set up the export file's label from the configuration
configToLabel :: Config -> [Char]
configToLabel config = outputPath config
<> (unpack $ phyloName config)
<> "-" <> (timeToLabel config)
<> "-scale_" <> (show (phyloLevel config))
<> "-" <> (seaToLabel config)
<> "-" <> (sensToLabel config)
<> "-" <> (cliqueToLabel config)
<> "-level_" <> (show (_qua_granularity $ phyloQuality config))
<> "-" <> (syncToLabel config)
<> ".dot"
-- To write a sha256 from a set of config's parameters
configToSha :: PhyloStage -> Config -> [Char]
configToSha stage config = unpack
$ replace "/" "-"
$ T.pack (show (hash $ C8.pack label))
where
label :: [Char]
label = case stage of
PhyloWithCliques -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
PhyloWithLinks -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
<> (sensToLabel config)
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloLevel config))
writePhylo :: [Char] -> Phylo -> IO ()
writePhylo path phylo = Lazy.writeFile path $ encode phylo
readPhylo :: [Char] -> IO Phylo
readPhylo path = do
phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
case phyloJson of
Left err -> do
putStrLn err
undefined
Right phylo -> pure phylo
--------------
-- | Main | --
--------------
......@@ -148,9 +238,45 @@ main = do
corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the Phylo"
let phylo = toPhylo corpus mapList config
printIOMsg "Reconstruct the phylo"
let phyloWithCliquesFile = (outputPath config) <> "phyloWithCliques_" <> (configToSha PhyloWithCliques config) <> ".json"
let phyloWithLinksFile = (outputPath config) <> "phyloWithLinks_" <> (configToSha PhyloWithLinks config) <> ".json"
phyloWithCliquesExists <- doesFileExist phyloWithCliquesFile
phyloWithLinksExists <- doesFileExist phyloWithLinksFile
-- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
-- writePhylo phyloWithCliquesFile phyloStep
-- let phylo = toPhylo (setConfig config phyloStep)
phyloWithLinks <- if phyloWithLinksExists
then do
printIOMsg "Reconstruct the phylo from an existing file with intertemporal links"
readPhylo phyloWithLinksFile
else do
if phyloWithCliquesExists
then do
printIOMsg "Reconstruct the phylo from an existing file with cliques"
phyloWithCliques <- readPhylo phyloWithCliquesFile
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithCliques <- pure $ toPhyloStep corpus mapList config
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
writePhylo phyloWithLinksFile phyloWithLinks
-- | probes
......@@ -162,33 +288,8 @@ main = do
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport phylo
let clq = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s -> "clique_" <> (show s)
let sensibility = case (phyloProximity config) of
Hamming -> undefined
WeightedLogJaccard s -> (show s)
let sync = case (phyloSynchrony config) of
ByProximityThreshold t _ _ _ -> (show t)
ByProximityDistribution _ _ -> undefined
-- to be improved
-- let br_length = case (take 1 $ exportFilter config) of
-- ByBranchSize t -> (show t)
let output = (outputPath config)
<> (unpack $ phyloName config)
<> "-" <> clq
<> "-level_" <> (show (phyloLevel config))
<> "-sens_" <> sensibility
-- <> "-lenght_" <> br_length
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-sync_" <> sync
<> ".dot"
let dot = toPhyloExport (setConfig config phyloWithLinks)
let output = configToLabel config
dotToFile output dot
......@@ -89,17 +89,11 @@ library:
- Gargantext.Core.Text.Flow
- Gargantext.Core.Viz.Graph
- Gargantext.Core.Viz.Graph.Index
- Gargantext.Core.Viz.Phylo
- Gargantext.Core.Viz.AdaptativePhylo
- Gargantext.Core.Viz.Phylo.PhyloMaker
- Gargantext.Core.Viz.Phylo.Tools
- Gargantext.Core.Viz.Phylo.PhyloTools
- Gargantext.Core.Viz.Phylo.PhyloExport
- Gargantext.Core.Viz.Phylo.SynchronicClustering
- Gargantext.Core.Viz.Phylo.Example
- Gargantext.Core.Viz.Phylo.LevelMaker
- Gargantext.Core.Viz.Phylo.View.Export
- Gargantext.Core.Viz.Phylo.View.ViewMaker
- Gargantext.Core.Viz.Types
dependencies:
- HSvm
......@@ -135,6 +129,7 @@ library:
- conduit-extra
- containers
- contravariant
- cryptohash
- crawlerHAL
- crawlerISTEX
- crawlerIsidore
......@@ -312,31 +307,6 @@ executables:
- unordered-containers
- full-text-search
gargantext-phylo:
main: Main.hs
source-dirs: bin/gargantext-phylo
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- aeson
- async
- base
- bytestring
- containers
- directory
- gargantext
- vector
- parallel
- cassava
- ini
- optparse-generic
- split
- unordered-containers
gargantext-adaptative-phylo:
main: Main.hs
source-dirs: bin/gargantext-adaptative-phylo
......@@ -360,7 +330,8 @@ executables:
- ini
- optparse-generic
- split
- unordered-containers
- unordered-containers
- cryptohash
gargantext-import:
main: Main.hs
......
......@@ -66,7 +66,7 @@ import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Query.Tree (tree, TreeMode(..))
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Gargantext.Core.Viz.Phylo.Legacy.LegacyAPI (PhyloAPI, phyloAPI)
import qualified Gargantext.API.Node.Share as Share
import qualified Gargantext.API.Node.Update as Update
import qualified Gargantext.API.Search as Search
......
......@@ -61,17 +61,18 @@ import Data.Set (Set)
import Data.Set (fromList, toList, isSubsetOf)
import Data.Graph.Inductive hiding (Graph, neighbors, subgraph, (&))
import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGraphUfromEdges)
import Gargantext.Core.Viz.Graph.Tools (cooc2graph', Threshold)
import Gargantext.Core.Viz.Graph.Tools (cooc2graph',cooc2graph'', Threshold)
import Gargantext.Core.Methods.Distances (Distance)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex)
import Gargantext.Core.Viz.AdaptativePhylo
-- import Debug.Trace (trace)
type Graph = Graph_Undirected
type Neighbor = Node
-- | getMaxCliques
-- TODO chose distance order
getMaxCliques :: Ord a => Distance -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques d t m = map fromIndices $ getMaxCliques' t m'
getMaxCliques :: Ord a => CliqueFilter -> Distance -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques f d t m = map fromIndices $ getMaxCliques' t m'
where
m' = toIndex to m
(to,from) = createIndices m
......@@ -81,7 +82,10 @@ getMaxCliques d t m = map fromIndices $ getMaxCliques' t m'
getMaxCliques' t' n = maxCliques graph
where
graph = mkGraphUfromEdges (Map.keys n')
n' = cooc2graph' d t' n
-- n' = cooc2graph' d t' n
n' = case f of ByThreshold -> cooc2graph' d t' n
ByNeighbours -> cooc2graph'' d t' n
maxCliques :: Graph -> [[Node]]
maxCliques g = map (\n -> subMaxCliques g (n:ns)) ns & concat & takeMax
......
......@@ -72,6 +72,15 @@ data Proximity =
-- , _wlj_elevation :: Double
-}
}
| WeightedLogSim
{ _wlj_sensibility :: Double
{-
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-}
}
| Hamming
deriving (Show,Generic,Eq)
......@@ -99,13 +108,16 @@ data TimeUnit =
, _year_matchingFrame :: Int }
deriving (Show,Generic,Eq)
data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
data Clique =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
| MaxClique
{ _mcl_size :: Int }
{ _mcl_size :: Int
, _mcl_threshold :: Double
, _mcl_filter :: CliqueFilter }
deriving (Show,Generic,Eq)
......@@ -124,6 +136,7 @@ data Config =
, phyloLevel :: Int
, phyloProximity :: Proximity
, seaElevation :: SeaElevation
, findAncestors :: Bool
, phyloSynchrony :: Synchrony
, phyloQuality :: Quality
, timeUnit :: TimeUnit
......@@ -144,10 +157,11 @@ defaultConfig =
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10
, seaElevation = Constante 0.1 0.1
, phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
, phyloQuality = Quality 100 1
, findAncestors = True
, phyloSynchrony = ByProximityThreshold 0.1 10 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0 1
, timeUnit = Year 3 1 5
, clique = MaxClique 0
, clique = MaxClique 0 3 ByNeighbours
, exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2]
......@@ -163,6 +177,8 @@ instance FromJSON SeaElevation
instance ToJSON SeaElevation
instance FromJSON TimeUnit
instance ToJSON TimeUnit
instance FromJSON CliqueFilter
instance ToJSON CliqueFilter
instance FromJSON Clique
instance ToJSON Clique
instance FromJSON PhyloLabel
......@@ -423,5 +439,17 @@ makeLenses ''PhyloBranch
-- | JSON instances | --
------------------------
instance FromJSON Phylo
instance ToJSON Phylo
instance FromJSON PhyloParam
instance ToJSON PhyloParam
instance FromJSON PhyloPeriod
instance ToJSON PhyloPeriod
instance FromJSON PhyloLevel
instance ToJSON PhyloLevel
instance FromJSON Software
instance ToJSON Software
instance FromJSON PhyloGroup
instance ToJSON PhyloGroup
$(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)
......@@ -10,8 +10,7 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Graph
where
......
......@@ -71,6 +71,37 @@ cooc2graphWith :: PartitionMethod
cooc2graphWith Louvain = cooc2graphWith' (cLouvain "1")
cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
cooc2graph'' :: Ord t => Distance
-> Double
-> Map (t, t) Int
-> Map (Index, Index) Double
cooc2graph'' distance threshold myCooc = neighbouMap
where
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
matCooc = map2mat Triangular 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
neighbouMap = filterByNeighbours threshold
$ mat2map distanceMat
-- Quentin
filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
filterByNeighbours threshold distanceMap = filteredMap
where
indexes :: [Index]
indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
filteredMap :: Map (Index, Index) Double
filteredMap = Map.fromList
$ List.concat
$ map (\idx ->
let selected = List.reverse
$ List.sortOn snd
$ Map.toList
$ Map.filter (> 0)
$ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
in List.take (round threshold) selected
) indexes
cooc2graphWith' :: ToComId a
=> Partitions a
......@@ -129,6 +160,49 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
myCooc' bridgeness' confluence' partitions
-- cooc2graph :: Distance
-- -> Threshold
-- -> (Map (Text, Text) Int)
-- -> IO Graph
-- cooc2graph distance threshold myCooc = do
-- printDebug "cooc2graph" distance
-- let
-- -- TODO remove below
-- theMatrix = Map.fromList $ HashMap.toList myCooc
-- (ti, _) = createIndices theMatrix
-- myCooc' = toIndex ti theMatrix
-- matCooc = map2mat 0 (Map.size ti)
-- $ Map.filterWithKey (\(a,b) _ -> a /= b)
-- $ Map.filter (> 1) myCooc'
-- distanceMat = measure distance matCooc
-- distanceMap = Map.filter (> threshold) $ mat2map distanceMat
-- nodesApprox :: Int
-- nodesApprox = n'
-- where
-- (as, bs) = List.unzip $ Map.keys distanceMap
-- n' = Set.size $ Set.fromList $ as <> bs
-- ClustersParams rivers _level = clustersParams nodesApprox
-- printDebug "Start" ("partitions" :: Text)
-- partitions <- if (Map.size distanceMap > 0)
-- -- then iLouvainMap 100 10 distanceMap
-- -- then hLouvain distanceMap
-- then doPartitions distanceMap
-- else panic "Text.Flow: DistanceMap is empty"
-- printDebug "End" ("partitions" :: Text)
-- let
-- -- bridgeness' = distanceMap
-- bridgeness' = trace ("Rivers: " <> show rivers)
-- $ bridgeness rivers partitions distanceMap
-- confluence' = confluence (Map.keys bridgeness') 3 True False
-- pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
-- myCooc' bridgeness' confluence' partitions
------------------------------------------------------------------------
------------------------------------------------------------------------
data ClustersParams = ClustersParams { bridgness :: Double
......
......@@ -25,7 +25,7 @@ one 8, e54847.
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Phylo where
module Gargantext.Core.Viz.LegacyPhylo where
import Control.DeepSeq
import Control.Lens (makeLenses)
......@@ -261,11 +261,12 @@ data LouvainParams = LouvainParams
-- | Proximity constructors
data Proximity = WeightedLogJaccard WLJParams
| WeightedLogSim WLJParams
| Hamming HammingParams
| Filiation
deriving (Generic, Show, Eq, Read)
-- | Parameters for WeightedLogJaccard proximity
-- | Parameters for WeightedLogJaccard and WeightedLogSim proximity
data WLJParams = WLJParams
{ _wlj_threshold :: !Double
, _wlj_sensibility :: !Double
......
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Aggregates
where
import Control.Parallel.Strategies
import Gargantext.Prelude hiding (elem)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Debug.Trace (trace)
import Data.List (partition, concat, nub, elem, sort, (++), null, union)
import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, elems, keys, unionWith, mapWithKey)
import Data.Set (size)
import Data.Text (Text, unwords)
import Data.Vector (Vector)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
---------------------
-- | Foundations | --
---------------------
-- | Extract all the labels of a termList
termListToNgrams :: TermList -> [Ngrams]
termListToNgrams = map (\(lbl,_) -> unwords lbl)
-------------------
-- | Documents | --
-------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("----\nGroup docs by periods\n") $ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs :: Vector Ngrams -> [(Date,Text)] -> [Document]
parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
-----------------
-- | Periods | --
-----------------
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "initPeriods" l, last' "initPeriods" l))
$ chunkAlong g s [start .. end]
--------------
-- | Cooc | --
--------------
-- | To transform a tuple of group's information into a coocurency Matrix
toCooc :: [([Int],Double)] -> Map (Int, Int) Double
toCooc l = map (/docs)
$ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat
$ map (\x -> listToFullCombi $ fst x) l
where
--------------------------------------
idx :: [Int]
idx = nub $ concat $ map fst l
--------------------------------------
docs :: Double
docs = sum $ map snd l
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = fromList $ map (\x -> (x,0)) $ listToFullCombi idx
--------------------------------------
-- | To reduce a coocurency Matrix to some keys
getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
&& (elem (snd k) idx)) cooc
-- | To get a coocurency Matrix related to a given list of Periods
getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
where
--------------------------------------
-- | Here we need to go back to the level 1 (aka : the Fis level)
gs :: [PhyloGroup]
gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-------------
-- | Fis | --
-------------
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFis keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
| null l' = True
| length l' > length l = False
| (union l l') == l = True
| otherwise = False
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested m =
let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ getClique f') (Set.toList $ getClique f)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ getClique f) (Set.toList $ getClique f')) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
in fromList $ zip (keys m) fis'
-- | Choose if we use a set of Fis from a file or if we have to create them
docsToFis :: Map (Date,Date) [Document] -> Phylo -> Map (Date, Date) [PhyloFis]
docsToFis m p = if (null $ getPhyloFis p)
then trace("----\nRebuild the Fis from scratch\n")
$ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n")
$ unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m) (getPhyloFis p)
-- | Process some filters on top of a set of Fis
refineFis :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
refineFis fis k s t = traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
-----------------
-- | Tracers | --
-----------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
<> "support : " <> show (countSup 1 supps) <> " (>1) "
<> show (countSup 2 supps) <> " (>2) "
<> show (countSup 3 supps) <> " (>3) "
<> show (countSup 4 supps) <> " (>4) "
<> show (countSup 5 supps) <> " (>5) "
<> show (countSup 6 supps) <> " (>6)\n"
<> "clique size : " <> show (countSup 1 ngrms) <> " (>1) "
<> show (countSup 2 ngrms) <> " (>2) "
<> show (countSup 3 ngrms) <> " (>3) "
<> show (countSup 4 ngrms) <> " (>4) "
<> show (countSup 5 ngrms) <> " (>5) "
<> show (countSup 6 ngrms) <> " (>6)\n"
) m
where
--------------------------------------
countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l
--------------------------------------
supps :: [Double]
supps = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
--------------------------------------
ngrms :: [Double]
ngrms = sort $ map (\f -> fromIntegral $ size $ _phyloFis_clique f) $ concat $ elems m
--------------------------------------
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.BranchMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),sortOn,sort,null,intersect,union,delete)
import Data.Map (Map,(!), fromListWith, elems)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Cluster
import Gargantext.Core.Viz.Phylo.Aggregates
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace)
---------------------------
-- | Readability links | --
---------------------------
getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
getFramedPeriod :: [PhyloGroup] -> (Date,Date)
getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
getGroupsNgrams :: [PhyloGroup] -> [Int]
getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
-- | Process a Jaccard on top of two set of Branch Peaks
areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
/ ((fromIntegral . length) $ union ns ns')) >= thr
-- | Get the framing period of a branch ([[PhyloGroup]])
getBranchPeriod :: [PhyloGroup] -> (Date,Date)
getBranchPeriod gs =
let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
-- | Get the Nth most coocurent Ngrams in a list of Groups
getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
getGroupsPeaks gs nth p = getNthMostOcc nth
$ getSubCooc (getGroupsNgrams gs)
$ getCooc (getGroupsPeriods gs) p
-- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
(getGroupsPeaks gs (getPhyloReBranchNth p) p)
(getGroupsPeaks gs' (getPhyloReBranchNth p) p))
&& ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
&& (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
) branches
-- | Try to connect a focused branch to other candidate branches by finding the best pointers
reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
reBranch p branch candidates =
let newLinks = map (\branch' ->
let pointers = map (\g ->
-- define pairs of candidates groups
let pairs = listToPairs
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
-- process the matching between the pairs and the current group
in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
in if (g2 == g3)
then mem ++ [(getGroupId g,(getGroupId g2,s))]
else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
) branch
pointers' = pointers `using` parList rdeepseq
-- keep the best pointer between the focused branch and the current candidates
in head' "reBranch" $ reverse $ sortOn (snd . snd)
$ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
) candidates
newLinks' = newLinks `using` parList rdeepseq
in newLinks'
reLinkPhyloBranches :: Level -> Phylo -> Phylo
reLinkPhyloBranches lvl p =
let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
$ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
([],branches) branches
in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
where
branches :: [[PhyloGroup]]
branches = elems
$ fromListWith (++)
$ foldl' (\mem g -> case getGroupBranchId g of
Nothing -> mem
Just i -> mem ++ [(i,[g])] )
[] $ getGroupsWithLevel lvl p
------------------
-- | Branches | --
------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
graphToBranches groups = Map.fromList
$ concat
$ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
$ zip [1..]
$ relatedComp
$ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterGroupWithLevel (\g ->
let bIdx = branches ! (getGroupId g)
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
where
--------------------------------------
branches :: Map PhyloGroupId Int
branches = graphToBranches (getGroupsWithLevel lvl p)
--------------------------------------
-- trace' bs = trace bs
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Phylo.Cluster
where
import Control.Parallel.Strategies
import Data.Graph.Clustering.Louvain.CplusPlus
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
import Data.Map (Map, fromList, mapKeys)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
--------------
-- | Algo | --
--------------
relatedComp :: Eq a => [[a]] -> [[a]]
relatedComp graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
<$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
<$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
where
--------------------------------------
idx :: PhyloGroup -> Int
idx e = case elemIndex e nodes of
Nothing -> panic "[ERR][Gargantext.Core.Viz.Phylo.Metrics.Clustering] a node is missing"
Just i -> i
--------------------------------------
-----------------------
-- | Cluster Maker | --
-----------------------
-- | Optimisation to filter only relevant candidates
getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ filter (\(g,g') -> g /= g')
$ listToDirectedCombi gs
-- | To transform a Graph into Clusters
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of
Louvain (LouvainParams _) -> undefined
RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
_ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph nbDocs prox gs = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
$ getCandidates gs
candidates' = candidates `using` parList rdeepseq
in candidates' )
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined
-- | To filter a Graph of Proximity using a given threshold
filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
filterGraph prox (ns,es) = case prox of
WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
_ -> undefined
-- | To clusterise a Phylo
phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
phyloToClusters lvl clus p = Map.fromList
$ zip periods
$ map (\g -> if null (fst g)
then []
else graphToClusters clus g) graphs'
where
--------------------------------------
graphs' :: [([GroupNode],[GroupEdge])]
graphs' = traceGraphFiltered lvl
$ map (\g -> filterGraph prox g) graphs
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
gs' = gs `using` parList rdeepseq
in gs'
--------------------------------------
prox :: Proximity
prox = getProximity clus
--------------------------------------
periods :: [PhyloPeriodId]
periods = getPhyloPeriods p
--------------------------------------
----------------
-- | Tracer | --
----------------
traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = sort $ map snd $ concat $ map snd g
traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " edges\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = sort $ map snd $ concat $ map snd g
This diff is collapsed.
......@@ -14,11 +14,11 @@ Portability : POSIX
{-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Viz.Phylo.API
module Gargantext.Core.Viz.Phylo.Legacy.LegacyAPI
where
import Data.Maybe (fromMaybe)
import Control.Lens ((^.))
-- import Data.Maybe (fromMaybe)
-- import Control.Lens ((^.))
import Data.String.Conversions
--import Control.Monad.Reader (ask)
import qualified Data.ByteString as DB
......@@ -33,12 +33,12 @@ import Web.HttpApiData (readTextData)
import Gargantext.API.Prelude
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import Gargantext.Database.Query.Table.Node (insertNodes, node, getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Database.Query.Table.Node (insertNodes, node)
-- import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Main
import Gargantext.Core.Viz.Phylo.Example
import Gargantext.Core.Viz.LegacyPhylo
import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
-- import Gargantext.Core.Viz.Phylo.Example
import Gargantext.Core.Types (TODO(..))
------------------------------------------------------------------------
......@@ -96,17 +96,20 @@ type GetPhylo = QueryParam "listId" ListId
-- Add real text processing
-- Fix Filter parameters
getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo phId _lId l msb = do
phNode <- getNodeWith phId (Proxy :: Proxy HyperdataPhylo)
let
level = fromMaybe 2 l
branc = fromMaybe 2 msb
maybePhylo = phNode ^. (node_hyperdata . hp_data)
p <- liftBase $ viewPhylo2Svg
$ viewPhylo level branc
$ fromMaybe phyloFromQuery maybePhylo
pure (SVG p)
getPhylo _ _lId _ _ = undefined
-- getPhylo phId _lId l msb = do
-- phNode <- getNodeWith phId (Proxy :: Proxy HyperdataPhylo)
-- let
-- level = fromMaybe 2 l
-- branc = fromMaybe 2 msb
-- maybePhylo = phNode ^. (node_hyperdata . hp_data)
-- p <- liftBase $ viewPhylo2Svg
-- $ viewPhylo level branc
-- $ fromMaybe phyloFromQuery maybePhylo
-- pure (SVG p)
------------------------------------------------------------------------
type PostPhylo = QueryParam "listId" ListId
-- :> ReqBody '[JSON] PhyloQueryBuild
......@@ -137,9 +140,9 @@ putPhylo = undefined
-- | Instances
instance Arbitrary Phylo where arbitrary = elements [phylo]
-- instance Arbitrary Phylo where arbitrary = elements [phylo]
instance Arbitrary PhyloGroup where arbitrary = elements []
instance Arbitrary PhyloView where arbitrary = elements [phyloView]
-- instance Arbitrary PhyloView where arbitrary = elements [phyloView]
instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData
instance FromHttpApiData ExportMode where parseUrlPiece = readTextData
instance FromHttpApiData Filiation where parseUrlPiece = readTextData
......
......@@ -11,35 +11,37 @@ Portability : POSIX
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Main
module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
where
import Data.GraphViz
-- import Data.GraphViz
-- import qualified Data.ByteString as DB
import qualified Data.List as List
import Data.Maybe
import Data.Text (Text)
import Debug.Trace (trace)
import GHC.IO (FilePath)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types
import Gargantext.Database.Admin.Types.Node
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types
import Gargantext.Core.Viz.Phylo hiding (Svg, Dot)
import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import Gargantext.Database.Query.Table.Node(defaultList)
import Gargantext.Prelude
import Gargantext.Database.Action.Flow
import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Query.Table.Node(defaultList)
import Gargantext.Database.Query.Table.NodeNode (selectDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.ByteString as DB
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
import Gargantext.Database.Query.Table.NodeNode (selectDocs)
import Gargantext.Core.Types
-- import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
-- import Gargantext.Core.Viz.Phylo.Tools
-- import Gargantext.Core.Viz.Phylo.View.Export
-- import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
type MinSizeBranch = Int
......@@ -91,31 +93,40 @@ flowPhylo' corpus terms l m fp = do
defaultQuery :: PhyloQueryBuild
defaultQuery = defaultQueryBuild'
"Default Title"
"Default Description"
defaultQuery = undefined
-- defaultQuery = defaultQueryBuild'
-- "Default Title"
-- "Default Description"
buildPhylo :: [Document] -> TermList -> Phylo
buildPhylo = trace (show defaultQuery) $ buildPhylo' defaultQuery
buildPhylo' :: PhyloQueryBuild -> [Document] -> TermList -> Phylo
buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
buildPhylo' _ _ _ = undefined
-- buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
-- refactor 2021
-- queryView :: Level -> MinSizeBranch -> PhyloQueryView
-- queryView level _minSizeBranch = PhyloQueryView level Merge False 2
-- [BranchAge]
-- []
-- -- [SizeBranch $ SBParams minSizeBranch]
-- [BranchPeakFreq,GroupLabelCooc]
-- (Just (ByBranchAge,Asc))
-- Json Flat True
queryView :: Level -> MinSizeBranch -> PhyloQueryView
queryView level _minSizeBranch = PhyloQueryView level Merge False 2
[BranchAge]
[]
-- [SizeBranch $ SBParams minSizeBranch]
[BranchPeakFreq,GroupLabelCooc]
(Just (ByBranchAge,Asc))
Json Flat True
queryView _level _minSizeBranch = undefined
viewPhylo :: Level -> MinSizeBranch -> Phylo -> PhyloView
viewPhylo l b phylo = toPhyloView (queryView l b) phylo
viewPhylo _l _b _phylo = undefined
-- viewPhylo l b phylo = toPhyloView (queryView l b) phylo
writePhylo :: FilePath -> PhyloView -> IO FilePath
writePhylo fp phview = runGraphviz (viewToDot phview) Svg fp
writePhylo _fp _phview = undefined
-- writePhylo fp phview = runGraphviz (viewToDot phview) Svg fp
viewPhylo2Svg :: PhyloView -> IO DB.ByteString
viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
-- refactor 2021
-- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
-- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
This diff is collapsed.
This diff is collapsed.
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Metrics
where
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Control.Lens hiding (Level)
import Data.List ((\\), sortOn, concat, nub, union, intersect, null, (++), sort)
import Data.Map (Map, (!), toList, size, insert, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault, fromListWith)
import Data.Text (Text)
-- import Debug.Trace (trace)
----------------
-- | Ngrams | --
----------------
-- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m)
/ (m ! (j,j))
-- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
-- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Return the inclusion score of a given ngram
inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Process some metrics on top of ngrams
getNgramsMeta :: Map (Int, Int) Double -> [Int] -> Map Text [Double]
getNgramsMeta m ngrams = fromList
[ ("genericity" , map (\n -> genericity m (ngrams \\ [n]) n) ngrams ),
("specificity", map (\n -> specificity m (ngrams \\ [n]) n) ngrams ),
("inclusion" , map (\n -> inclusion m (ngrams \\ [n]) n) ngrams )]
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take nth
$ reverse
$ sortOn snd $ toList cooc
-------------------------
-- | Ngrams Dynamics | --
-------------------------
sharedWithParents :: Date -> PhyloBranchId -> Int -> PhyloView -> Bool
sharedWithParents inf bid n pv = elem n
$ foldl (\mem pn -> if ((bid == (fromJust $ (pn ^. pn_bid)))
&& (inf > (fst $ getNodePeriod pn)))
then nub $ mem ++ (pn ^. pn_idx)
else mem ) []
$ (pv ^. pv_nodes)
findDynamics :: Int -> PhyloView -> PhyloNode -> Map Int (Date,Date) -> Double
findDynamics n pv pn m =
let prd = getNodePeriod pn
bid = fromJust $ (pn ^. pn_bid)
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- emergence
then 2
else if ((fst prd) == (fst $ m ! n))
-- recombination
then 0
else if (not $ sharedWithParents (fst prd) bid n pv)
-- decrease
then 1
else 3
processDynamics :: PhyloView -> PhyloView
processDynamics pv = alterPhyloNode (\pn ->
pn & pn_metrics %~ insert "dynamics" (map (\n -> findDynamics n pv pn ngramsDates) $ (pn ^. pn_idx) ) ) pv
where
--------------------------------------
ngramsDates :: Map Int (Date,Date)
ngramsDates = map (\ds -> let ds' = sort ds
in (head' "Dynamics" ds', last' "Dynamics" ds'))
$ fromListWith (++)
$ foldl (\mem pn -> mem ++ (map (\n -> (n, [fst $ getNodePeriod pn, snd $ getNodePeriod pn]))
$ (pn ^. pn_idx))) []
$ (pv ^. pv_nodes)
--------------------------------------
-------------------
-- | Proximity | --
-------------------
-- | Process the inverse sumLog
sumInvLog :: Double -> [Double] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
-- | Process the sumLog
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | To get the diagonal of a matrix
toDiago :: Map (Int, Int) Double -> [Double]
toDiago cooc = elems $ filterWithKey (\(x,x') _ -> x == x') cooc
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
| null gInter = 0
| gInter == gUnion = 1
| sens == 0 = jaccard gInter gUnion
| sens > 0 = (sumInvLog sens wInter) / (sumInvLog sens wUnion)
| otherwise = (sumLog sens wInter) / (sumLog sens wUnion)
where
--------------------------------------
gInter :: [Int]
gInter = intersect ngrams ngrams'
--------------------------------------
gUnion :: [Int]
gUnion = union ngrams ngrams'
--------------------------------------
wInter :: [Double]
wInter = toDiago $ map (/nbDocs) $ intersectionWith (+) cooc cooc'
--------------------------------------
wUnion :: [Double]
wUnion = toDiago $ map (/nbDocs) $ unionWith (+) cooc cooc'
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double
hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2))
where
--------------------------------------
inter :: Map (Int, Int) Double
inter = intersection f1 f2
--------------------------------------
......@@ -49,7 +49,7 @@ phyloDot = toPhyloExport phylo2
--------------------------------------------------
phylo2 :: Phylo
phylo2 = synchronicClustering phylo1
phylo2 = synchronicClustering $ toHorizon phylo1
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
......@@ -104,7 +104,7 @@ config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2
, exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 }
, clique = MaxClique 0 15 ByNeighbours }
docs :: [Document]
......
......@@ -661,8 +661,8 @@ toPhyloExport phylo = exportToDot phylo
groups = traceExportGroups
$ processDynamics
$ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo
$ toHorizon phylo
$ tracePhyloInfo phylo
-- $ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
......@@ -670,12 +670,10 @@ traceExportBranches branches = trace ("\n"
<> "-- | Export " <> show(length branches) <> " branches") branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups = trace ("\n"
<> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
) groups
tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with β = "
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
<> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
) phylo
......
......@@ -24,6 +24,8 @@ import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
......@@ -49,20 +51,22 @@ toPhylo' (PhyloBase phylo) = toPhylo
-}
toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
$ traceToPhylo (phyloLevel conf) $
if (phyloLevel conf) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phylo1 [2..(phyloLevel conf)]
toPhylo :: Phylo -> Phylo
toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
$ traceToPhylo (phyloLevel $ getConfig phyloStep) $
if (phyloLevel $ getConfig phyloStep) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloLevel $ getConfig phyloStep)]
else phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 docs phyloBase
-- > AD to db here
phyloAncestors :: Phylo
phyloAncestors =
if (findAncestors $ getConfig phyloStep)
then toHorizon phylo1
else phylo1
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
phylo1 :: Phylo
phylo1 = toPhylo1 phyloStep
-- > AD to db here
--------------------------------------
......@@ -131,13 +135,21 @@ cliqueToGroup fis pId lvl idx coocs = PhyloGroup pId lvl idx ""
[] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
Constante start gap -> constanteTemporalMatching start gap
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative steps -> adaptativeTemporalMatching steps
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
toPhylo1 :: Phylo -> Phylo
toPhylo1 phyloStep = case (getSeaElevation phyloStep) of
Constante start gap -> constanteTemporalMatching start gap phyloStep
Adaptative steps -> adaptativeTemporalMatching steps phyloStep
-----------------------
-- | To Phylo Step | --
-----------------------
-- To build the first phylo step from docs and terms
toPhyloStep :: [Document] -> TermList -> Config -> Phylo
toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative _ -> toGroupsProxi 1 $ appendGroups cliqueToGroup 1 phyloClique phyloBase
where
--------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
......@@ -145,9 +157,10 @@ toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
--------------------------------------
docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
-- docs' = groupDocsByPeriod' date (getPeriodIds phyloBase) docs
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
--------------------------------------
---------------------------
-- | Frequent Item Set | --
......@@ -196,8 +209,8 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ filterClique True s (filterCliqueBySupport)
{- \$ traceFis "Unfiltered Fis" -}
phyloClique
MaxClique s -> filterClique True s (filterCliqueBySize)
phyloClique
MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
phyloClique
where
--------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
......@@ -209,13 +222,13 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
MaxClique _ ->
MaxClique _ thr filterType ->
let mcl = map (\(prd,docs) ->
let cooc = map round
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.001 cooc))
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
......
......@@ -225,12 +225,12 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
getCliqueSupport :: Clique -> Int
getCliqueSupport unit = case unit of
Fis s _ -> s
MaxClique _ -> 0
MaxClique _ _ _ -> 0
getCliqueSize :: Clique -> Int
getCliqueSize unit = case unit of
Fis _ s -> s
MaxClique s -> s
MaxClique s _ _ -> s
--------------
......@@ -295,12 +295,14 @@ filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local =
case proximity of
WeightedLogJaccard _ -> local >= thr
WeightedLogSim _ -> local >= thr
Hamming -> undefined
getProximityName :: Proximity -> String
getProximityName proximity =
case proximity of
WeightedLogJaccard _ -> "WLJaccard"
WeightedLogSim _ -> "WeightedLogSim"
Hamming -> "Hamming"
---------------
......@@ -344,6 +346,16 @@ getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
setConfig :: Config -> Phylo -> Phylo
setConfig config phylo = phylo
& phylo_param .~ (PhyloParam
((phylo ^. phylo_param) ^. phyloParam_version)
((phylo ^. phylo_param) ^. phyloParam_software)
config)
-- & phylo_param & phyloParam_config & phyloParam_config .~ config
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
......@@ -464,7 +476,7 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo =
trace ( "\n" <> "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
......@@ -484,6 +496,7 @@ traceSynchronyStart phylo =
getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s -> s
WeightedLogSim s -> s
Hamming -> undefined
----------------
......
......@@ -42,7 +42,7 @@ mergeGroups coocs id mapIds childs =
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
[]
(mergeAncestors $ concat $ map _phylo_groupAncestors childs)
where
--------------------
bId :: [Int]
......@@ -50,6 +50,9 @@ mergeGroups coocs id mapIds childs =
--------------------
updatePointers :: [Pointer] -> [Pointer]
updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
--------------------
mergeAncestors :: [Pointer] -> [Pointer]
mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloLevel :: Level -> Phylo -> Phylo
......@@ -132,8 +135,12 @@ groupsToEdges prox sync nbDocs diago groups =
toEdges sens edges =
case prox of
WeightedLogJaccard _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' sens nbDocs diago
((g,g'), weightedLogJaccard' (sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
WeightedLogSim _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined
toParentId :: PhyloGroup -> PhyloGroupId
......@@ -169,6 +176,17 @@ adjustClustering sync branches = case sync of
ByProximityDistribution _ _ -> branches
levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
levelUpAncestors groups =
-- 1) create an associative map of (old,new) ids
let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupLevelParents))) groups
in map (\g ->
let id' = ids' ! (getGroupId g)
ancestors = g ^. phylo_groupAncestors
-- 2) level up the ancestors ids and filter the ones that will be merged
ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors
in g & phylo_groupAncestors .~ ancestors'
) groups
synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo =
......@@ -182,7 +200,7 @@ synchronicClustering phylo =
$ phyloToLastBranches
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
in toNextLevel' phylo $ concat newBranches'
in toNextLevel' phylo $ levelUpAncestors $ concat newBranches'
-- synchronicDistance :: Phylo -> Level -> String
......
This diff is collapsed.
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Display
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (null,(++),sortOn)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To transform a flat Phyloview into a nested Phyloview
toNestedView :: [PhyloNode] -> [PhyloNode] -> [PhyloNode]
toNestedView ns ns'
| null ns' = ns
| otherwise = toNestedView (filter (\n -> lvl' == getNodeLevel n) nested)
(filter (\n -> lvl' < getNodeLevel n) nested)
where
--------------------------------------
lvl' :: Level
lvl' = getNodeLevel $ head' "toNestedView" nested
--------------------------------------
nested :: [PhyloNode]
nested = foldl (\ns'' n -> let nIds' = getNodeParentsId n
in map (\n' -> if elem (getNodeId n') nIds'
then n' & pn_childs %~ (++ [n])
else n') ns'') ns' ns
--------------------------------------
-- | To process a DisplayMode to a PhyloView
processDisplay :: DisplayMode -> ExportMode -> PhyloView -> PhyloView
processDisplay d e v = case e of
Json -> case d of
Flat -> v
Nested -> let ns = sortOn getNodeLevel $ v ^. pv_nodes
lvl = getNodeLevel $ head' "processDisplay" ns
in v & pv_nodes .~ toNestedView (filter (\n -> lvl == getNodeLevel n) ns)
(filter (\n -> lvl < getNodeLevel n) ns)
_ -> v
This diff is collapsed.
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Filters
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (notElem,null,nub,(\\),intersect)
import Data.Maybe (isNothing)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To clean a PhyloView list of Nodes, Edges, etc after having filtered its Branches
cleanNodesEdges :: PhyloView -> PhyloView -> PhyloView
cleanNodesEdges v v' = v' & pv_nodes %~ (filter (\n -> not $ elem (getNodeId n) nIds))
& pv_nodes %~ (map (\n -> if isNothing (n ^. pn_parents)
then n
else if (not .null) $ (getNodeParentsId n) `intersect` nIds
then n & pn_parents .~ Nothing
else n ))
& pv_edges %~ (filter (\e -> (not $ elem (getSourceId e) nIds)
&& (not $ elem (getTargetId e) nIds)))
where
--------------------------------------
nIds :: [PhyloGroupId]
nIds = map getNodeId
$ filter (\n -> elem (getNodeBranchId n) bIds)
$ getNodesInBranches v
--------------------------------------
bIds :: [PhyloBranchId]
bIds = (getViewBranchIds v) \\ (getViewBranchIds v')
--------------------------------------
-- | To filter all the LonelyBranches (ie: isolated one in time & with a small number of nodes) of a PhyloView
filterLonelyBranch :: Int -> Int -> Int -> [PhyloPeriodId] -> PhyloView -> PhyloView
filterLonelyBranch inf sup min' prds v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> let
ns = filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v
prds' = nub $ map (\n -> (fst . fst) $ getNodeId n) ns
in not (isLone ns prds')))
--------------------------------------
isLone :: [PhyloNode] -> [PhyloPeriodId] -> Bool
isLone ns prds' = (length ns <= min')
&& notElem (head' "filterLonelyBranch1" prds') (take inf prds)
&& notElem (head' "filterLonelyBranch2" prds') (take sup $ reverse prds)
--------------------------------------
-- | To filter all the branches with a minimal size in a PhyloView
filterSizeBranch :: Int -> PhyloView -> PhyloView
filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) >= min'))
--------------------------------------
-- | To process a list of QueryFilter to a PhyloView
processFilters :: [Filter] -> Phylo -> PhyloView -> PhyloView
processFilters fs p v = foldl (\v' f -> case f of
LonelyBranch (LBParams inf sup min') -> filterLonelyBranch inf sup min' (getPhyloPeriods p) v'
SizeBranch (SBParams min') -> filterSizeBranch min' v'
-- _ -> panic "[ERR][Viz.Phylo.View.Filters.processFilters] filter not found"
) v fs
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Metrics
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (last,groupBy,sortOn)
import Data.Map (insert)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To add a new meta Metric to a PhyloBranch
addBranchMetrics :: PhyloBranchId -> Text -> Double -> PhyloView -> PhyloView
addBranchMetrics id lbl val v = over (pv_branches
. traverse)
(\b -> if getBranchId b == id
then b & pb_metrics %~ insert lbl [val]
else b) v
branchGroups :: PhyloView -> PhyloView
branchGroups v = foldl (\v' (bId,nb) -> addBranchMetrics bId "nbGroups" nb v') v
$ map (\(bId,ns) -> (bId,fromIntegral $ length ns))
$ getNodesByBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchAge :: PhyloView -> PhyloView
branchAge v = foldl (\v' b -> let bId = (fst . (head' "branchAge")) b
prds = sortOn fst $ map snd b
in addBranchMetrics bId "age" ((abs . fromIntegral) $ ((snd . last) prds) - (fst $ head' "branchAge" prds)) v') v
$ groupBy ((==) `on` fst)
$ sortOn fst
$ map (\n -> (getNodeBranchId n, (fst . fst) $ getNodeId n))
$ getNodesInBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchBirth :: PhyloView -> PhyloView
branchBirth v = foldl (\v' b -> let bId = (fst . (head' "branchBirth")) b
prds = sortOn fst $ map snd b
in addBranchMetrics bId "birth" (fromIntegral $ fst $ head' "branchAge" prds) v') v
$ groupBy ((==) `on` fst)
$ sortOn fst
$ map (\n -> (getNodeBranchId n, (fst . fst) $ getNodeId n))
$ getNodesInBranches v
-- | To process a list of Metrics to a PhyloView
processMetrics :: [Metric] -> Phylo -> PhyloView -> PhyloView
processMetrics ms _p v = foldl (\v' m -> case m of
BranchAge -> branchAge v'
BranchBirth -> branchBirth v'
BranchGroups -> branchGroups v'
-- _ -> panic "[ERR][Viz.Phylo.Example.processMetrics] metric not found"
) v ms
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Sort
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (sortOn)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To sort a PhyloView by Age
sortBranchByAge :: Order -> PhyloView -> PhyloView
sortBranchByAge o v = v & pv_branches %~ f
where
--------------------------------------
f :: [PhyloBranch] -> [PhyloBranch]
f xs = case o of
Asc -> sortOn (getBranchMeta "age") xs
Desc -> reverse $ sortOn (getBranchMeta "age") xs
--------------------------------------
-- | To sort a PhyloView by Birth date of a branch
sortBranchByBirth :: Order -> PhyloView -> PhyloView
sortBranchByBirth o v = v & pv_branches %~ f
where
--------------------------------------
f :: [PhyloBranch] -> [PhyloBranch]
f xs = case o of
Asc -> sortOn (getBranchMeta "birth") xs
Desc -> reverse $ sortOn (getBranchMeta "birth") xs
--------------------------------------
-- | To process a Sort to a PhyloView
processSort :: Maybe (Sort,Order) -> Phylo -> PhyloView -> PhyloView
processSort s _p v = case s of
Nothing -> v
Just s' -> case fst s' of
ByBranchAge -> sortBranchByAge (snd s') v
ByBranchBirth -> sortBranchByBirth (snd s') v
--_ -> panic "[ERR][Viz.Phylo.View.Sort.processSort] sort not found"
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Taggers
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,groupBy,sortOn,sort, (!!), union, (\\))
import Data.Text (Text)
import Data.Vector (Vector)
import Data.Map (Map, (!), empty, unionWith)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.BranchMaker
import Gargantext.Core.Viz.Phylo.Metrics
import qualified Data.Map as Map
import Control.Parallel.Strategies
-- import Debug.Trace (trace)
-- | To get the nth most frequent Ngrams in a list of PhyloGroups
mostFreqNgrams :: Int -> [PhyloGroup] -> [Int]
mostFreqNgrams thr groups = map fst
$ take thr
$ reverse
$ sortOn snd
$ map (\g -> (head' "mostFreqNgrams" g,length g))
$ groupBy (==)
$ (sort . concat)
$ map getGroupNgrams groups
-- | To transform the nth most frequent Ngrams into a label
freqToLabel :: Int -> Vector Ngrams -> [PhyloGroup] -> Text
freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l
-- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup
mostOccNgrams :: Int -> PhyloGroup -> [Int]
mostOccNgrams nth g = (nub . concat)
$ map (\((f,s),_d) -> [f,s])
$ take nth
$ reverse $ sortOn snd
$ Map.toList cooc
where
cooc :: Map (Int, Int) Double
cooc = getGroupCooc g
-- | To alter the peak of a PhyloBranch
alterBranchPeak :: (PhyloBranchId,Text) -> PhyloView -> PhyloView
alterBranchPeak (id,lbl) v = over (pv_branches
. traverse)
(\b -> if getBranchId b == id
then b & pb_peak .~ lbl
else b) v
-- | To set the peak of a PhyloBranch as the nth most frequent terms of its PhyloNodes
branchPeakFreq :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakFreq v thr p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ map (\(id,ns) -> (id, freqToLabel thr (getFoundationsRoots p)
$ getGroupsFromNodes ns p))
$ getNodesByBranches v
branchPeakCooc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ map (\(id,ns) -> (id, ngramsToLabel (getFoundationsRoots p) (getGroupsPeaks (getGroupsFromNodes ns p) nth p) ) )
$ getNodesByBranches v
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
$ reverse
$ sortOn snd $ zip [0..] meta
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p) $ mostOccNgrams thr g
in n & pn_label .~ lbl) v
-- | To set the label of a PhyloNode as the nth most inclusives terms of its PhyloNodes
nodeLabelInc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "inclusion" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p)
$ getNthMostMeta thr ((g ^. phylo_groupNgramsMeta) ! "inclusion") (getGroupNgrams g)
in n & pn_label .~ lbl) v
nodeLabelInc' :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc' v nth p = over (pv_nodes
. traverse)
(\pn -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ take nth
$ map (\(_,(_,idx)) -> idx)
$ concat
$ map (\groups -> sortOn (fst . snd) groups)
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ zip ((pn ^. pn_metrics) ! "inclusion")
$ zip ((pn ^. pn_metrics) ! "dynamics") (pn ^. pn_idx)
in pn & pn_label .~ lbl) v
branchPeakInc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakInc v nth p =
let labels = map (\(id,nodes) ->
let cooc = foldl (\mem pn -> unionWith (+) mem (pn ^. pn_cooc)) empty nodes
ngrams = sort $ foldl (\mem pn -> union mem (pn ^. pn_idx)) [] nodes
inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
lbl = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta nth inc ngrams
in (id, lbl))
$ getNodesByBranches v
labels' = labels `using` parList rdeepseq
in foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v labels'
-- | To process a sorted list of Taggers to a PhyloView
processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView
processTaggers ts p v = foldl (\v' t -> case t of
BranchPeakFreq -> branchPeakFreq v' 2 p
BranchPeakCooc -> branchPeakCooc v' 2 p
BranchPeakInc -> branchPeakInc v' 2 p
GroupLabelInc -> nodeLabelInc v' 2 p
GroupLabelIncDyn -> nodeLabelInc' v' 2 p
GroupLabelCooc -> nodeLabelCooc v' 2 p) v ts
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.ViewMaker
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,(++),sort)
import Data.Text (Text)
import Data.Map (Map, empty, elems, unionWithKey, fromList)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.View.Display
import Gargantext.Core.Viz.Phylo.View.Filters
import Gargantext.Core.Viz.Phylo.View.Metrics
import Gargantext.Core.Viz.Phylo.View.Sort
import Gargantext.Core.Viz.Phylo.View.Taggers
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-- | To init a PhyloBranch
initPhyloBranch :: PhyloBranchId -> Text -> PhyloBranch
initPhyloBranch id lbl = PhyloBranch id lbl empty
-- | To init a PhyloEdge
initPhyloEdge :: PhyloGroupId -> [Pointer] -> EdgeType -> [PhyloEdge]
initPhyloEdge id pts et = map (\pt -> PhyloEdge id (fst pt) et (snd pt)) pts
-- | To init a PhyloView
initPhyloView :: Level -> Text -> Text -> Filiation -> Bool -> Phylo -> PhyloView
initPhyloView lvl lbl dsc fl vb p = PhyloView (getPhyloParams p) lbl dsc fl lvl
(getPhyloPeriods p)
empty
([] ++ (phyloToBranches lvl p))
([] ++ (groupsToNodes True vb (getFoundationsRoots p) gs))
([] ++ (groupsToEdges fl PeriodEdge gs))
where
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
--------------------------------------
-- | To transform a list of PhyloGroups into a list of PhyloNodes
groupsToNodes :: Bool -> Bool -> Vector Ngrams -> [PhyloGroup] -> [PhyloNode]
groupsToNodes isR isV ns gs = map (\g -> let idxs = getGroupNgrams g
in PhyloNode
(getGroupId g)
(getGroupBranchId g)
"" idxs
(if isV
then Just (ngramsToText ns idxs)
else Nothing)
(g ^. phylo_groupNgramsMeta)
(g ^. phylo_groupCooc)
(if (not isR)
then Just (getGroupLevelParentsId g)
else Nothing)
[]
) gs
-- | To merge edges by keeping the maximum weight
mergeEdges :: [PhyloEdge] -> [PhyloEdge] -> [PhyloEdge]
mergeEdges lAsc lDes = elems
$ unionWithKey (\_k vAsc vDes -> vDes & pe_weight .~ (max (vAsc ^. pe_weight) (vDes ^. pe_weight))) mAsc mDes
where
--------------------------------------
mAsc :: Map (PhyloGroupId,PhyloGroupId) PhyloEdge
mAsc = fromList
$ map (\(k,e) -> (k, e & pe_source .~ fst k
& pe_target .~ snd k))
$ zip (map (\e -> (e ^. pe_target,e ^. pe_source)) lAsc) lAsc
--------------------------------------
mDes :: Map (PhyloGroupId,PhyloGroupId) PhyloEdge
mDes = fromList
$ zip (map (\e -> (e ^. pe_source,e ^. pe_target)) lDes) lDes
--------------------------------------
-- | To transform a list of PhyloGroups into a list of PhyloEdges
groupsToEdges :: Filiation -> EdgeType -> [PhyloGroup] -> [PhyloEdge]
groupsToEdges fl et gs = case fl of
Complete -> (groupsToEdges Ascendant et gs) ++ (groupsToEdges Descendant et gs)
Merge -> mergeEdges (groupsToEdges Ascendant et gs) (groupsToEdges Descendant et gs)
_ -> concat
$ map (\g -> case fl of
Ascendant -> case et of
PeriodEdge -> initPhyloEdge (getGroupId g) (getGroupPeriodParents g) et
LevelEdge -> initPhyloEdge (getGroupId g) (getGroupLevelParents g) et
Descendant -> case et of
PeriodEdge -> initPhyloEdge (getGroupId g) (getGroupPeriodChilds g) et
LevelEdge -> initPhyloEdge (getGroupId g) (getGroupLevelChilds g) et
_Type -> panic "[ERR][Viz.Phylo.View.ViewMaker.groupsToEdges] not implemented"
) gs
-- | To transform a Phylo into a list of PhyloBranch for a given Level
phyloToBranches :: Level -> Phylo -> [PhyloBranch]
phyloToBranches lvl p = map (\id -> initPhyloBranch id "") $ nub $ getBranchIdsWith lvl p
-- | To add recursively a list of PhyloNodes and Edges to PhyloView from a given Level and Depth
addChildNodes :: Bool -> Level -> Level -> Bool -> Filiation -> Phylo -> PhyloView -> PhyloView
addChildNodes shouldDo lvl lvlMin vb fl p v =
if (not shouldDo) || (lvl == lvlMin)
then v
else addChildNodes shouldDo (lvl - 1) lvlMin vb fl p
$ v & pv_branches %~ (++ (phyloToBranches (lvl - 1) p))
& pv_nodes %~ (++ (groupsToNodes False vb (getFoundationsRoots p) gs'))
& pv_edges %~ (++ (groupsToEdges fl PeriodEdge gs'))
& pv_edges %~ (++ (groupsToEdges Descendant LevelEdge gs ))
& pv_edges %~ (++ (groupsToEdges Ascendant LevelEdge gs'))
where
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
--------------------------------------
gs' :: [PhyloGroup]
gs' = getGroupsWithLevel (lvl - 1) p
--------------------------------------
-- | To transform a PhyloQuery into a PhyloView
toPhyloView :: PhyloQueryView -> Phylo -> PhyloView
toPhyloView q p = traceView
$ processDisplay (q ^. qv_display) (q ^. qv_export)
$ processSort (q ^. qv_sort ) p
$ processTaggers (q ^. qv_taggers) p
$ processDynamics
$ processFilters (q ^. qv_filters) p
$ processMetrics (q ^. qv_metrics) p
$ addChildNodes (q ^. qv_levelChilds) (q ^. qv_lvl) (q ^. qv_levelChildsDepth) (q ^. qv_verbose) (q ^. qv_filiation) p
$ initPhyloView (q ^. qv_lvl) (getPhyloTitle p) (getPhyloDescription p) (q ^. qv_filiation) (q ^. qv_verbose) p
-----------------
-- | Taggers | --
-----------------
traceView :: PhyloView -> PhyloView
traceView pv = trace ("------------\n--| View |--\n------------\n\n"
<> "view level : " <> show (pv ^. pv_level) <> "\n"
<> show (length $ pv ^. pv_branches) <> " exported branches with " <> show (length $ pv ^. pv_nodes) <> " groups\n"
<> "groups by branches : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") pv
where
lst = sort $ map (fromIntegral . length . snd) $ getNodesByBranches pv
......@@ -23,7 +23,7 @@ module Gargantext.Database.Admin.Types.Hyperdata.Phylo
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Core.Viz.Phylo (Phylo(..))
import Gargantext.Core.Viz.LegacyPhylo (Phylo(..))
------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment