Commit b88a8088 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-phylo' into dev-merge

parents 8f864992 6fc1eb02
{-|
Module : Main.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE Strict #-}
module Main where
import Data.Aeson
import Data.ByteString.Lazy (ByteString)
import Data.Maybe (isJust, fromJust)
import Data.List (concat, nub, isSuffixOf, take)
import Data.String (String)
import Data.Text (Text, unwords)
import Gargantext.Prelude
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Gargantext.Text.Context (TermList)
import Gargantext.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloMaker (toPhylo)
import GHC.IO (FilePath)
import Prelude (Either(..))
import System.Environment
import System.Directory (listDirectory)
import Control.Concurrent.Async (mapConcurrently)
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector
import qualified Gargantext.Text.Corpus.Parsers.CSV as Csv
---------------
-- | Tools | --
---------------
-- | To print an important message as an IO()
printIOMsg :: String -> IO ()
printIOMsg msg =
putStrLn ( "\n"
<> "------------"
<> "\n"
<> "-- | " <> msg <> "\n" )
-- | To print a comment as an IO()
printIOComment :: String -> IO ()
printIOComment cmt =
putStrLn ( "\n" <> cmt <> "\n" )
-- | To get all the files in a directory or just a file
getFilesFromPath :: FilePath -> IO([FilePath])
getFilesFromPath path = do
if (isSuffixOf "/" path)
then (listDirectory path)
else return [path]
--------------
-- | Json | --
--------------
-- | To read and decode a Json file
readJson :: FilePath -> IO ByteString
readJson path = Lazy.readFile path
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (a, Text) -> (a, [Text])
filterTerms patterns (y,d) = (y,termsInText patterns d)
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
--------------------------------------
-- | To transform a Wos file (or [file]) into a readable corpus
wosToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
wosToCorpus limit path = do
files <- getFilesFromPath path
take limit
<$> map (\d -> let date' = fromJust $ _hyperdataDocument_publication_year d
title = fromJust $ _hyperdataDocument_title d
abstr = if (isJust $ _hyperdataDocument_abstract d)
then fromJust $ _hyperdataDocument_abstract d
else ""
in (date', title <> " " <> abstr))
<$> concat
<$> mapConcurrently (\file ->
filter (\d -> (isJust $ _hyperdataDocument_publication_year d)
&& (isJust $ _hyperdataDocument_title d))
<$> parseFile WOS (path <> file) ) files
-- | To transform a Csv file into a readable corpus
csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
csvToCorpus limit path = Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> (csv_publication_year row, (csv_title row) <> " " <> (csv_abstract row)))
<$> snd <$> Csv.readFile path
-- | To use the correct parser given a CorpusType
fileToCorpus :: CorpusParser -> FilePath -> IO ([(Int,Text)])
fileToCorpus parser path = case parser of
Wos limit -> wosToCorpus limit path
Csv limit -> csvToCorpus limit path
-- | To parse a file into a list of Document
fileToDocs :: CorpusParser -> FilePath -> TermList -> IO [Document]
fileToDocs parser path lst = do
corpus <- fileToCorpus parser path
let patterns = buildPatterns lst
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
--------------
-- | Main | --
--------------
main :: IO ()
main = do
printIOMsg "Starting the reconstruction"
printIOMsg "Read the configuration file"
[args] <- getArgs
jsonArgs <- (eitherDecode <$> readJson args) :: IO (Either String Config)
case jsonArgs of
Left err -> putStrLn err
Right config -> do
printIOMsg "Parse the corpus"
mapList <- csvGraphTermList (listPath config)
corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the Phylo"
let phylo = toPhylo corpus mapList config
printIOMsg "End of reconstruction"
\ No newline at end of file
......@@ -216,7 +216,7 @@ main = do
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth,BranchGroups] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True
let phylo = toPhylo query corpus termList fis'
......
......@@ -68,6 +68,8 @@ library:
- Gargantext.Viz.Graph.Distances.Matrice
- Gargantext.Viz.Graph.Index
- Gargantext.Viz.Phylo
- Gargantext.Viz.AdaptativePhylo
- Gargantext.Viz.Phylo.PhyloMaker
- Gargantext.Viz.Phylo.Tools
- Gargantext.Viz.Phylo.Example
- Gargantext.Viz.Phylo.LevelMaker
......@@ -278,6 +280,31 @@ executables:
- split
- unordered-containers
gargantext-adaptative-phylo:
main: Main.hs
source-dirs: bin/gargantext-adaptative-phylo
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- aeson
- async
- base
- bytestring
- containers
- directory
- gargantext
- vector
- parallel
- cassava
- ini
- optparse-generic
- split
- unordered-containers
gargantext-import:
main: Main.hs
source-dirs: bin/gargantext-import
......
{-|
Module : Gargantext.Viz.AdaptativePhylo
Description : Phylomemy definitions and types.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Specifications of Phylomemy export format.
Phylomemy can be described as a Temporal Graph with different scale of
granularity of group of ngrams (terms and multi-terms).
The main type is Phylo which is synonym of Phylomemy (only difference is
the number of chars).
References:
Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
in science evolution — the rise and fall of scientific fields. PloS
one 8, e54847.
-}
{-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.AdaptativePhylo where
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Text (Text, pack)
import Data.Vector (Vector)
import Data.Map (Map)
import Data.Set (Set)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Gargantext.Text.Context (TermList)
import GHC.Generics
import GHC.IO (FilePath)
import Control.DeepSeq (NFData)
import Control.Lens (makeLenses)
----------------
-- | Config | --
----------------
data CorpusParser =
Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int}
deriving (Show,Generic,Eq)
data Proximity =
WeightedLogJaccard
{ _wlj_sensibility :: Double
, _wlj_thresholdInit :: Double
, _wlj_thresholdStep :: Double }
| Hamming
deriving (Show,Generic,Eq)
data TimeUnit =
Year
{ _year_period :: Int
, _year_step :: Int
, _year_matchingFrame :: Int }
deriving (Show,Generic,Eq)
data ContextualUnit =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
deriving (Show,Generic,Eq)
data Config =
Config { corpusPath :: FilePath
, listPath :: FilePath
, outputPath :: FilePath
, corpusParser :: CorpusParser
, phyloName :: Text
, phyloLevel :: Int
, phyloProximity :: Proximity
, timeUnit :: TimeUnit
, contextualUnit :: ContextualUnit
, branchSize :: Int
} deriving (Show,Generic,Eq)
defaultConfig :: Config
defaultConfig =
Config { corpusPath = ""
, listPath = ""
, outputPath = ""
, corpusParser = Csv 1000
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 0 0.05
, timeUnit = Year 3 1 5
, contextualUnit = Fis 2 4
, branchSize = 3
}
instance FromJSON Config
instance ToJSON Config
instance FromJSON CorpusParser
instance ToJSON CorpusParser
instance FromJSON Proximity
instance ToJSON Proximity
instance FromJSON TimeUnit
instance ToJSON TimeUnit
instance FromJSON ContextualUnit
instance ToJSON ContextualUnit
-- | Software parameters
data Software =
Software { _software_name :: Text
, _software_version :: Text
} deriving (Generic, Show, Eq)
defaultSoftware :: Software
defaultSoftware =
Software { _software_name = pack "Gargantext"
, _software_version = pack "v4" }
-- | Global parameters of a Phylo
data PhyloParam =
PhyloParam { _phyloParam_version :: Text
, _phyloParam_software :: Software
, _phyloParam_config :: Config
} deriving (Generic, Show, Eq)
defaultPhyloParam :: PhyloParam
defaultPhyloParam =
PhyloParam { _phyloParam_version = pack "v2.adaptative"
, _phyloParam_software = defaultSoftware
, _phyloParam_config = defaultConfig }
------------------
-- | Document | --
------------------
-- | Date : a simple Integer
type Date = Int
-- | Ngrams : a contiguous sequence of n terms
type Ngrams = Text
-- | Document : a piece of Text linked to a Date
data Document = Document
{ date :: Date
, text :: [Ngrams]
} deriving (Eq,Show,Generic,NFData)
--------------------
-- | Foundation | --
--------------------
-- | The Foundations of a Phylo created from a given TermList
data PhyloFoundations = PhyloFoundations
{ _foundations_roots :: !(Vector Ngrams)
, _foundations_mapList :: TermList
} deriving (Generic, Show, Eq)
---------------------------
-- | Coocurency Matrix | --
---------------------------
-- | Cooc : a coocurency matrix between two ngrams
type Cooc = Map (Int,Int) Double
-------------------
-- | Phylomemy | --
-------------------
-- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo
-- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
-- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
-- param : the parameters of the phylomemy (with the user's configuration)
-- periods : the temporal steps of a phylomemy
data Phylo =
Phylo { _phylo_foundations :: PhyloFoundations
, _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double)
, _phylo_param :: PhyloParam
, _phylo_periods :: Map PhyloPeriodId PhyloPeriod
}
deriving (Generic, Show, Eq)
-- | PhyloPeriodId : the id of a given period
type PhyloPeriodId = (Date,Date)
-- | PhyloPeriod : steps of a phylomemy on a temporal axis
-- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity
data PhyloPeriod =
PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
} deriving (Generic, Show, Eq)
-- | Level : a level of clustering
type Level = Int
-- | PhyloLevelId : the id of a level of clustering in a given period
type PhyloLevelId = (PhyloPeriodId,Level)
-- | PhyloLevel : levels of phylomemy on a synchronic axis
-- Levels description:
-- Level 0: The foundations and the base of the phylo
-- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data PhyloLevel =
PhyloLevel { _phylo_levelPeriod :: (Date,Date)
, _phylo_levelLevel :: Level
, _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
}
deriving (Generic, Show, Eq)
type PhyloGroupId = (PhyloLevelId, Int)
-- | BranchId : (a level, a sequence of branch index)
-- the sequence is a path of heritage from the most to the less specific branch
type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period
data PhyloGroup =
PhyloGroup { _phylo_groupPeriod :: (Date,Date)
, _phylo_groupLevel :: Level
, _phylo_groupIndex :: Int
, _phylo_groupSupport :: Support
, _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc)
, _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupGhostPointers :: [Pointer]
}
deriving (Generic, Show, Eq)
-- | Weight : A generic mesure that can be associated with an Id
type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type Pointer = (PhyloGroupId, Weight)
type Link = ((PhyloGroupId, PhyloGroupId), Weight)
data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
---------------------------
-- | Frequent Item Set | --
---------------------------
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs
type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique
, _phyloFis_support :: Support
, _phyloFis_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq)
----------------
-- | Lenses | --
----------------
makeLenses ''Config
makeLenses ''Proximity
makeLenses ''ContextualUnit
makeLenses ''TimeUnit
makeLenses ''PhyloFoundations
makeLenses ''PhyloFis
makeLenses ''Phylo
makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel
makeLenses ''PhyloGroup
makeLenses ''PhyloParam
------------------------
-- | JSON instances | --
------------------------
$(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)
\ No newline at end of file
......@@ -80,8 +80,8 @@ data Phylo =
, _phylo_foundations :: PhyloFoundations
, _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: Map Date Double
, _phylo_cooc :: Map Date (Map (Int,Int) Double)
, _phylo_fis :: Map (Date,Date) [PhyloFis]
, _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
, _phylo_fis :: !(Map (Date,Date) [PhyloFis])
, _phylo_param :: PhyloParam
}
deriving (Generic, Show, Eq)
......@@ -156,7 +156,7 @@ data PhyloGroup =
, _phylo_groupNgramsMeta :: Map Text [Double]
, _phylo_groupMeta :: Map Text Double
, _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupCooc :: Map (Int,Int) Double
, _phylo_groupCooc :: !(Map (Int,Int) Double)
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
......@@ -309,7 +309,7 @@ data SBParams = SBParams
-- | Metric constructors
data Metric = BranchAge | BranchBirth deriving (Generic, Show, Eq, Read)
data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
----------------
......
......@@ -83,7 +83,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge,BranchBirth] [] [BranchPeakInc,GroupLabelIncDyn] (Just (ByBranchBirth,Asc)) Json Flat True
phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge,BranchBirth,BranchGroups] [] [BranchPeakInc,GroupLabelIncDyn] (Just (ByBranchBirth,Asc)) Json Flat True
--------------------------------------------------
......
......@@ -22,9 +22,10 @@ module Gargantext.Viz.Phylo.LevelMaker
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, zip, last, null)
import Data.Map (Map, (!), empty, singleton)
import Data.Map (Map, (!), empty, singleton, size)
import Data.Text (Text)
import Data.Tuple.Extra
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics
......@@ -83,7 +84,7 @@ instance PhyloLevelMaker PhyloFis
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _ p =
let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis p) $ zip [1..] l
let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
groups' = groups `using` parList rdeepseq
in groups'
--------------------------------------
......@@ -111,7 +112,7 @@ addPhyloLevel' lvl m p = alterPhyloPeriods
in over (phylo_periodLevels)
(\phyloLevels ->
let groups = toPhyloGroups lvl pId (m ! pId) m p
in phyloLevels ++ [PhyloLevel (pId, lvl) groups]
in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period) p
......@@ -121,8 +122,8 @@ addPhyloLevel' lvl m p = alterPhyloPeriods
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis p = PhyloGroup ((prd, lvl), idx) lbl ngrams
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
(singleton "support" (fromIntegral $ getSupport fis))
......@@ -132,10 +133,10 @@ cliqueToGroup prd lvl idx lbl fis p = PhyloGroup ((prd, lvl), idx) lbl ngrams
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
--------------------------------------
ngrams :: [Int]
ngrams = sort $ map (\x -> getIdxInRoots x p)
ngrams = sort $ map (\x -> getIdxInRoots' x root)
$ Set.toList
$ getClique fis
--------------------------------------
......@@ -210,7 +211,11 @@ toNthLevel lvlMax prox clus p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
$ transposePeriodLinks (lvl + 1)
-- $ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
......@@ -238,7 +243,8 @@ toPhylo1 clus prox d p = case clus of
$ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1
-- $ setLevelLinks (0,1)
$ addPhyloLevel 1 (getPhyloFis phyloFis) phyloFis
$ addPhyloLevel 1 (getPhyloFis phyloFis)
$ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
where
--------------------------------------
phyloFis :: Phylo
......@@ -279,6 +285,9 @@ tracePhyloN :: Level -> Phylo -> Phylo
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
traceTranspose :: Level -> Filiation -> Phylo -> Phylo
traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
tracePhyloBase :: Phylo -> Phylo
tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
......
......@@ -19,9 +19,9 @@ module Gargantext.Viz.Phylo.LinkMaker
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, delete, intersect, groupBy, union, inits, scanl, find)
import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, delete, intersect, elemIndex, groupBy, union, inits, scanl, find)
import Data.Tuple.Extra
import Data.Map (Map,(!),fromListWith,elems,restrictKeys,unionWith,member)
import Data.Map (Map, (!), fromListWith, elems, restrictKeys, filterWithKey, keys, unionWith, unions, intersectionWith, member, fromList)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
......@@ -222,6 +222,62 @@ interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
------------------------------------------------------------------------
-- | Make links from Period to Period after level 1
listToTuple :: (a -> b) -> [a] -> [(b,a)]
listToTuple f l = map (\x -> (f x, x)) l
groupsToMaps :: Ord b => (PhyloGroup -> b) -> [PhyloGroup] -> [Map PhyloGroupId PhyloGroup]
groupsToMaps f gs = map (\gs' -> fromList $ listToTuple getGroupId gs')
$ groupBy ((==) `on` f)
$ sortOn f gs
phyloToPeriodMaps :: Level -> Filiation -> Phylo -> [Map PhyloGroupId PhyloGroup]
phyloToPeriodMaps lvl fil p =
let prdMap = groupsToMaps (fst . getGroupPeriod) (getGroupsWithLevel lvl p)
in case fil of
Ascendant -> reverse prdMap
Descendant -> prdMap
_ -> panic ("[ERR][Viz.Phylo.LinkMaker.phyloToPeriodMaps] Wrong type of filiation")
trackPointersRec :: Filiation -> Map PhyloGroupId PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
trackPointersRec fil m gs res =
if (null gs) then res
else if (Map.null m) then res ++ gs
else
let g = head' "track" gs
pts = Map.fromList $ getGroupPointers PeriodEdge fil g
pts' = Map.toList $ fromListWith (\w w' -> max w w') $ concat $ elems
$ intersectionWith (\w g' -> map (\(id,_w') -> (id, w))
$ getGroupPointers LevelEdge Ascendant g') pts m
res' = res ++ [case fil of
Ascendant -> g & phylo_groupPeriodParents .~ pts'
Descendant -> g & phylo_groupPeriodChilds .~ pts'
_ -> panic ("[ERR][Viz.Phylo.LinkMaker.transposeLinks] Wrong type of filiation")]
in trackPointersRec fil (filterWithKey (\k _ -> not $ elem k (keys pts)) m) (tail' "track" gs) res'
transposeLinks :: Level -> Filiation -> Phylo -> Phylo
transposeLinks lvl fil p =
let prdMap = zip (phyloToPeriodMaps (lvl - 1) fil p) (phyloToPeriodMaps lvl fil p)
transposed = map (\(gs,gs') ->
let idx = fromJust $ elemIndex (gs,gs') prdMap
next = take (getPhyloMatchingFrame p) $ snd $ splitAt (idx + 1) prdMap
groups = trackPointersRec fil (unions $ map fst next) (elems gs') []
in (getGroupPeriod $ head' "transpose" groups ,groups)
) prdMap
transposed' = Map.fromList $ (transposed `using` parList rdeepseq)
in alterPhyloGroups
(\gs -> if ((not . null) gs) && (lvl == (getGroupLevel $ head' "transpose" gs))
then transposed' ! (getGroupPeriod $ head' "transpose" gs)
else gs
) p
-- | Transpose the parent/child pointers from one level to another
transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterPhyloGroups
......
{-|
Module : Gargantext.Viz.Phylo.PhyloExample
Description : Phylomemy example based on history of Cleopatre.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- | Cesar et Cleôpatre
-- | Exemple de phylomemie
-- | French without accents
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.Phylo.PhyloExample where
import Data.List (sortOn, nub, sort)
import Data.Map (Map)
import Data.Text (Text, toLower)
import Gargantext.Prelude
import Gargantext.Text.Context (TermList)
import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Viz.Phylo.PhyloMaker
import Control.Lens
import qualified Data.Vector as Vector
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
-----------------------------------------------
phylo1 :: Phylo
phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
---------------------------------------------
-- | STEP 2 | -- Build the frequent items set
---------------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docsByPeriods (getFisSupport $ contextualUnit config) (getFisSize $ contextualUnit config)
docsByPeriods :: Map (Date,Date) [Document]
docsByPeriods = groupDocsByPeriod date periods docs
--------------------------------------------
-- | STEP 1 | -- Init the Base of the Phylo
--------------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs mapList config
phyloCooc :: Map Date Cooc
phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots)
periods :: [(Date,Date)]
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit config) (getTimeStep $ timeUnit config)
nbDocsByYear :: Map Date Double
nbDocsByYear = docsToTimeScaleNb docs
config :: Config
config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, branchSize = 0
, contextualUnit = Fis 0 0 }
docs :: [Document]
docs = map (\(d,t)
-> Document d ( filter (\n -> isRoots n (foundations ^. foundations_roots))
$ monoTexts t)) corpus
foundations :: PhyloFoundations
foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList
--------------------------------------------
-- | STEP 0 | -- Let's start with an example
--------------------------------------------
mapList :: TermList
mapList = map (\a -> ([toLower a],[])) actants
actants :: [Ngrams]
actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
, "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
, "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
corpus :: [(Date, Text)]
corpus = sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.PhyloExport
Description : Exportation module of a Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.Phylo.PhyloExport where
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.PhyloMaker
Description : Maker engine for rebuilding a Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++))
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), filterWithKey, restrictKeys)
import Data.Set (size)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Gargantext.Text.Context (TermList)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.Set as Set
------------------
-- | To Phylo | --
------------------
toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 docs phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
--------------------------------------
--------------------
-- | To Phylo 1 | --
--------------------
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse)
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then
let pId = phyloLvl ^. phylo_levelPeriod
phyloFis = m ! pId
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloFis)
else
phyloLvl )
phylo
fisToGroup :: PhyloFis -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
fisToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloFis_clique) fdt
in PhyloGroup pId lvl idx
(fis ^. phyloFis_support)
ngrams
(ngramsToCooc ngrams coocs)
(1,[])
[] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo
toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
where
--------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docs' (getFisSupport $ contextualUnit $ getConfig phyloBase) (getFisSize $ contextualUnit $ getConfig phyloBase)
--------------------------------------
docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
--------------------------------------
---------------------------
-- | Frequent Item Set | --
---------------------------
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFis keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> (fis ^. phyloFis_support) >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ fis ^. phyloFis_clique) >= thr) l
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested m =
let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloFis_clique) (Set.toList $ f ^. phyloFis_clique)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloFis_clique) (Set.toList $ f' ^. phyloFis_clique)) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
in fromList $ zip (keys m) fis'
-- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis :: Map (Date, Date) [Document] -> Int -> Int -> Map (Date,Date) [PhyloFis]
toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$ filterFisByNested
$ traceFis "Filtered by clique size"
$ filterFis True clique (filterFisByClique)
$ traceFis "Filtered by support"
$ filterFis True support (filterFisBySupport)
$ traceFis "Unfiltered Fis" phyloFis
where
--------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis =
let fis = map (\(prd,docs) -> let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloFis (fst f) (snd f) prd) lst))
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
--------------------------------------
--------------------
-- | Coocurency | --
--------------------
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToKeys ngrams
in filterWithKey (\k _ -> elem k pairs) cooc
-- | To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
mCooc' = fromList
$ map (\t -> (t,empty))
$ toTimeScale (map date docs) 1
in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
$ unionWith sumCooc mCooc mCooc'
-----------------------
-- | to Phylo Base | --
-----------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
-- | To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
$ unionWith (+) time docs'
initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
initPhyloLevels lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax]
-- | To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n")
$ Phylo foundations
(docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs)
params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods)
{-|
Module : Gargantext.Viz.Phylo.PhyloTools
Description : Module dedicated to all the tools needed for making a Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub)
import Data.Set (Set, size)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey)
import Data.String (String)
import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Debug.Trace (trace)
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.List as List
import qualified Data.Set as Set
--------------
-- | Misc | --
--------------
countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l
elemIndex' :: Eq a => a -> [a] -> Int
elemIndex' e l = case (List.elemIndex e l) of
Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
Just i -> i
---------------------
-- | Foundations | --
---------------------
-- | Is this Ngrams a Foundations Root ?
isRoots :: Ngrams -> Vector Ngrams -> Bool
isRoots n ns = Vector.elem n ns
-- | To transform a list of nrams into a list of foundation's index
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
--------------
-- | Time | --
--------------
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
$ map (\(d,d') -> [d..d']) periods
findBounds :: [Date] -> (Date,Date)
findBounds dates =
let dates' = sort dates
in (head' "findBounds" dates', last' "findBounds" dates')
toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
toPeriods dates p s =
let (start,end) = findBounds dates
in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
$ chunkAlong p s [start .. end]
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale :: [Date] -> Int -> [Date]
toTimeScale dates step =
let (start,end) = findBounds dates
in [start, (start + step) .. end]
getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of
Year _ s _ -> s
getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of
Year p _ _ -> p
getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of
Year _ _ f -> f
-------------
-- | Fis | --
-------------
-- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
| null l' = True
| length l' > length l = False
| (union l l') == l = True
| otherwise = False
-- | To filter Fis with small Support but by keeping non empty Periods
keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
keepFilled f thr l = if (null $ f thr l) && (not $ null l)
then keepFilled f (thr - 1) l
else f thr l
traceClique :: Map (Date, Date) [PhyloFis] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloFis_clique) $ concat $ elems mFis
--------------------------------------
traceSupport :: Map (Date, Date) [PhyloFis] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
supports :: [Double]
supports = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems mFis
--------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n"
<> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
-------------------------
-- | Contextual unit | --
-------------------------
getFisSupport :: ContextualUnit -> Int
getFisSupport unit = case unit of
Fis s _ -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
getFisSize :: ContextualUnit -> Int
getFisSize unit = case unit of
Fis _ s -> s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
--------------
-- | Cooc | --
--------------
listToCombi' :: [a] -> [(a,a)]
listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
listToEqual' :: Eq a => [a] -> [(a,a)]
listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
listToKeys :: Eq a => [a] -> [(a,a)]
listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix :: [Int] -> Map (Int,Int) Double
listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
--------------------
-- | PhyloGroup | --
--------------------
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId group = ((group ^. phylo_groupPeriod, group ^. phylo_groupLevel), group ^. phylo_groupIndex)
---------------
-- | Phylo | --
---------------
addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
addPointers group fil pty pointers =
case pty of
TemporalPointer -> case fil of
ToChilds -> group & phylo_groupPeriodChilds %~ (++ pointers)
ToParents -> group & phylo_groupPeriodParents %~ (++ pointers)
LevelPointer -> case fil of
ToChilds -> group & phylo_groupLevelChilds %~ (++ pointers)
ToParents -> group & phylo_groupLevelParents %~ (++ pointers)
getPeriodIds :: Phylo -> [(Date,Date)]
getPeriodIds phylo = sortOn fst
$ keys
$ phylo ^. phylo_periods
getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo =
elems $ view ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) phylo
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo =
over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups
. traverse
) (\group ->
let id = getGroupId group
in
if member id m
then m ! id
else group ) phylo
------------------
-- | Pointers | --
------------------
pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
-------------------
-- | Proximity | --
-------------------
getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s _ _ -> s
Hamming -> undefined
getThresholdInit :: Proximity -> Double
getThresholdInit proxi = case proxi of
WeightedLogJaccard _ t _ -> t
Hamming -> undefined
getThresholdStep :: Proximity -> Double
getThresholdStep proxi = case proxi of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
----------------
-- | Branch | --
----------------
ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
\ No newline at end of file
{-|
Module : Gargantext.Viz.Phylo.SynchronicClustering
Description : Module dedicated to the adaptative synchronic clustering of a Phylo.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude
-- import Gargantext.Viz.AdaptativePhylo
-- import Gargantext.Viz.Phylo.PhyloTools
import Data.List (foldl', (++), null, intersect, (\\), union, nub, concat)
--------------------
-- | Clustering | --
--------------------
relatedComponents :: Eq a => [[a]] -> [[a]]
relatedComponents graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
\ No newline at end of file
This diff is collapsed.
......@@ -241,6 +241,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
getIdxInRoots' :: Ngrams -> Vector Ngrams -> Int
getIdxInRoots' n root = case (elemIndex n root) of
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInVector] Ngrams not in foundationsRoots: " <> cs n
......
......@@ -124,7 +124,10 @@ toDotLabel lbl = StrLabel $ fromStrict lbl
setPeakDotNode :: PhyloBranch -> Dot DotId
setPeakDotNode pb = node (toBranchDotId $ pb ^. pb_id)
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ pb ^. pb_peak)]
<> (setAttrFromMetrics $ pb ^. pb_metrics))
<> (setAttrFromMetrics $ pb ^. pb_metrics)
<> [ setAttr "nodeType" "peak"
, setAttr "branchId" ((pack $ show (fst $ getBranchId pb)) <> (pack $ show (snd $ getBranchId pb)))
])
-- | To set a Peak Edge
......@@ -134,9 +137,9 @@ setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHea
colorFromDynamics :: Double -> H.Attribute
colorFromDynamics d
| d == 0 = H.BGColor (toColor PaleGreen)
| d == 1 = H.BGColor (toColor SkyBlue)
| d == 2 = H.BGColor (toColor LightPink)
| d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor SkyBlue)
| otherwise = H.Color (toColor Black)
......@@ -186,14 +189,19 @@ setHtmlTable pn = H.Table H.HTable
-- | To set a Node
setDotNode :: PhyloNode -> Dot DotId
setDotNode pn = node (toNodeDotId $ pn ^. pn_id)
([FontName "Arial", Shape Square, toLabel (setHtmlTable pn)])
([FontName "Arial", Shape Square, toLabel (setHtmlTable pn)]
<> [ setAttr "nodeType" "group"
, setAttr "from" (pack $ show (fst $ getNodePeriod pn))
, setAttr "to" (pack $ show (fst $ getNodePeriod pn))
, setAttr "branchId" ((pack $ show (fst $ getNodeBranchId pn)) <> (pack $ show (snd $ getNodeBranchId pn)))
])
-- | To set an Edge
setDotEdge :: PhyloEdge -> Dot DotId
setDotEdge pe
| pe ^. pe_weight == 100 = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Red]]
| otherwise = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black]]
| otherwise = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black], Constraint True]
-- | To set a Period Edge
......@@ -213,8 +221,9 @@ viewToDot pv = digraph ((Str . fromStrict) $ pv ^. pv_title)
<> [setAttr "description" $ fromStrict $ pv ^. pv_description]
<> [setAttr "filiation" $ (pack . show) $ pv ^. pv_filiation]
<> (setAttrFromMetrics $ pv ^. pv_metrics)
<> [FontSize 30, LabelLoc VTop, Splines SplineEdges, Overlap ScaleOverlaps,
Ratio AutoRatio, Style [SItem Filled []],Color [toWColor White]])
<> [FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]])
-- set the peaks
......@@ -235,7 +244,10 @@ viewToDot pv = digraph ((Str . fromStrict) $ pv ^. pv_title)
-- set the period label
node (toPeriodDotId prd) [Shape Square, FontSize 50, Label (toPeriodDotLabel prd)]
node (toPeriodDotId prd) ([Shape Square, FontSize 50, Label (toPeriodDotLabel prd)]
<> [setAttr "nodeType" "period",
setAttr "from" (fromStrict $ T.pack $ (show $ fst prd)),
setAttr "to" (fromStrict $ T.pack $ (show $ snd prd))])
mapM setDotNode $ filterNodesByPeriod prd $ filterNodesByLevel (pv ^. pv_level) (pv ^.pv_nodes)
......
......@@ -36,6 +36,12 @@ addBranchMetrics id lbl val v = over (pv_branches
else b) v
branchGroups :: PhyloView -> PhyloView
branchGroups v = foldl (\v' (bId,nb) -> addBranchMetrics bId "nbGroups" nb v') v
$ map (\(bId,ns) -> (bId,fromIntegral $ length ns))
$ getNodesByBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchAge :: PhyloView -> PhyloView
branchAge v = foldl (\v' b -> let bId = (fst . (head' "branchAge")) b
......@@ -63,6 +69,7 @@ processMetrics :: [Metric] -> Phylo -> PhyloView -> PhyloView
processMetrics ms _p v = foldl (\v' m -> case m of
BranchAge -> branchAge v'
BranchBirth -> branchBirth v'
BranchGroups -> branchGroups v'
-- _ -> panic "[ERR][Viz.Phylo.Example.processMetrics] metric not found"
) v ms
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment