Commit faad42fa authored by qlobbe's avatar qlobbe

add the max clique

parent 37af3986
Pipeline #848 failed with stage
...@@ -33,7 +33,6 @@ import Data.Aeson.TH (deriveJSON) ...@@ -33,7 +33,6 @@ import Data.Aeson.TH (deriveJSON)
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Map (Map) import Data.Map (Map)
import Data.Set (Set)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -348,7 +347,7 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show) ...@@ -348,7 +347,7 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
type Support = Int type Support = Int
data PhyloClique = PhyloClique data PhyloClique = PhyloClique
{ _phyloClique_nodes :: Set Ngrams { _phyloClique_nodes :: [Int]
, _phyloClique_support :: Support , _phyloClique_support :: Support
, _phyloClique_period :: (Date,Date) , _phyloClique_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq) } deriving (Generic,NFData,Show,Eq)
......
...@@ -108,7 +108,7 @@ config = ...@@ -108,7 +108,7 @@ config =
defaultConfig { phyloName = "Cesar et Cleopatre" defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2 , phyloLevel = 2
, exportFilter = [ByBranchSize 0] , exportFilter = [ByBranchSize 0]
, clique = Fis 0 0 } , clique = MaxClique 0 }
docs :: [Document] docs :: [Document]
......
...@@ -17,7 +17,6 @@ module Gargantext.Viz.Phylo.PhyloMaker where ...@@ -17,7 +17,6 @@ module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail) import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Set (size)
import Data.Vector (Vector) import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -27,6 +26,7 @@ import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, consta ...@@ -27,6 +26,7 @@ import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, consta
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Viz.Graph.MaxClique (getMaxCliques)
import Control.DeepSeq (NFData) import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
...@@ -91,7 +91,7 @@ toGroupsProxi lvl phylo = ...@@ -91,7 +91,7 @@ toGroupsProxi lvl phylo =
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi) in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
...@@ -104,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -104,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo) , f obj pId lvl (length groups)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
...@@ -112,13 +112,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -112,13 +112,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo phylo
cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> [Cooc] -> PhyloGroup
cliqueToGroup fis pId lvl idx fdt coocs = cliqueToGroup fis pId lvl idx coocs = PhyloGroup pId lvl idx ""
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt
in PhyloGroup pId lvl idx ""
(fis ^. phyloClique_support) (fis ^. phyloClique_support)
ngrams (fis ^. phyloClique_nodes)
(ngramsToCooc ngrams coocs) (ngramsToCooc (fis ^. phyloClique_nodes) coocs)
(1,[0]) -- | branchid (lvl,[path in the branching tree]) (1,[0]) -- | branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] []
...@@ -161,46 +159,57 @@ filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= th ...@@ -161,46 +159,57 @@ filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= th
-- | To filter Fis with small Clique size -- | To filter Fis with small Clique size
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique] filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. phyloClique_nodes) >= thr) l
-- | To filter nested Fis -- | To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique] filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterCliqueByNested m = filterCliqueByNested m =
let clq = map (\l -> let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloClique_nodes) (Set.toList $ f ^. phyloClique_nodes)) mem) foldl (\mem f -> if (any (\f' -> isNested (f' ^. phyloClique_nodes) (f ^. phyloClique_nodes)) mem)
then mem then mem
else else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloClique_nodes) (Set.toList $ f' ^. phyloClique_nodes)) mem let fMax = filter (\f' -> not $ isNested (f ^. phyloClique_nodes) (f' ^. phyloClique_nodes)) mem
in fMax ++ [f] ) [] l) in fMax ++ [f] ) [] l)
$ elems m $ elems m
clq' = clq `using` parList rdeepseq clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) clq' in fromList $ zip (keys m) clq'
-- | To transform a time map of docs innto a time map of Fis with some filters -- | To transform a time map of docs into a time map of Fis with some filters
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique] toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis" Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested filterCliqueByNested
-- $ traceFis "Filtered by clique size" -- $ traceFis "Filtered by clique size"
$ filterClique True s' (filterCliqueBySize) $ filterClique True s' (filterCliqueBySize)
-- $ traceFis "Filtered by support" -- $ traceFis "Filtered by support"
$ filterClique True s (filterCliqueBySupport) $ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis" -- $ traceFis "Unfiltered Fis"
phyloClique phyloClique
MaxClique _ -> undefined MaxClique s -> filterClique True s (filterCliqueBySize)
phyloClique
where where
-------------------------------------- --------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique] phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = case (clique $ getConfig phylo) of phyloClique = case (clique $ getConfig phylo) of
Fis _ _ -> let fis = map (\(prd,docs) -> Fis _ _ ->
let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs) let fis = map (\(prd,docs) ->
in (prd, map (\f -> PhyloClique (fst f) (snd f) prd) lst)) let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) (snd f) prd) lst))
$ toList phyloDocs $ toList phyloDocs
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
in fromList fis' in fromList fis'
MaxClique _ -> undefined MaxClique _ ->
let mcl = map (\(prd,docs) ->
let cooc = map round
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques 0 cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
-------------------------------------- --------------------------------------
-- dev viz graph maxClique getMaxClique -- dev viz graph maxClique getMaxClique
......
...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.PhyloTools where ...@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy)
import Data.Set (Set, size, disjoint) import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty)
import Data.String (String) import Data.String (String)
import Data.Text (Text, unwords) import Data.Text (Text, unwords)
...@@ -178,7 +178,7 @@ traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (> ...@@ -178,7 +178,7 @@ traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>
where where
-------------------------------------- --------------------------------------
cliques :: [Double] cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloClique_nodes) $ concat $ elems mFis cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis
-------------------------------------- --------------------------------------
...@@ -229,6 +229,9 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst) ...@@ -229,6 +229,9 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix :: [Int] -> Map (Int,Int) Double listToMatrix :: [Int] -> Map (Int,Int) Double
listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToSeq :: Eq a => [a] -> [(a,a)] listToSeq :: Eq a => [a] -> [(a,a)]
listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ] listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment