Commit faad42fa authored by qlobbe's avatar qlobbe

add the max clique

parent 37af3986
......@@ -33,7 +33,6 @@ import Data.Aeson.TH (deriveJSON)
import Data.Text (Text, pack)
import Data.Vector (Vector)
import Data.Map (Map)
import Data.Set (Set)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
......@@ -348,7 +347,7 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
type Support = Int
data PhyloClique = PhyloClique
{ _phyloClique_nodes :: Set Ngrams
{ _phyloClique_nodes :: [Int]
, _phyloClique_support :: Support
, _phyloClique_period :: (Date,Date)
} deriving (Generic,NFData,Show,Eq)
......
......@@ -108,7 +108,7 @@ config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2
, exportFilter = [ByBranchSize 0]
, clique = Fis 0 0 }
, clique = MaxClique 0 }
docs :: [Document]
......
......@@ -17,7 +17,6 @@ module Gargantext.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Set (size)
import Data.Vector (Vector)
import Gargantext.Prelude
......@@ -27,6 +26,7 @@ import Gargantext.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, consta
import Gargantext.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Text.Context (TermList)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Viz.Graph.MaxClique (getMaxCliques)
import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
......@@ -91,7 +91,7 @@ toGroupsProxi lvl phylo =
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
......@@ -104,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) (getRoots phylo)
, f obj pId lvl (length groups)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit)
else
......@@ -112,13 +112,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> Vector Ngrams -> [Cooc] -> PhyloGroup
cliqueToGroup fis pId lvl idx fdt coocs =
let ngrams = ngramsToIdx (Set.toList $ fis ^. phyloClique_nodes) fdt
in PhyloGroup pId lvl idx ""
cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> [Cooc] -> PhyloGroup
cliqueToGroup fis pId lvl idx coocs = PhyloGroup pId lvl idx ""
(fis ^. phyloClique_support)
ngrams
(ngramsToCooc ngrams coocs)
(fis ^. phyloClique_nodes)
(ngramsToCooc (fis ^. phyloClique_nodes) coocs)
(1,[0]) -- | branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] []
......@@ -161,46 +159,57 @@ filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= th
-- | To filter Fis with small Clique size
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l
filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. phyloClique_nodes) >= thr) l
-- | To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterCliqueByNested m =
let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ f' ^. phyloClique_nodes) (Set.toList $ f ^. phyloClique_nodes)) mem)
foldl (\mem f -> if (any (\f' -> isNested (f' ^. phyloClique_nodes) (f ^. phyloClique_nodes)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ f ^. phyloClique_nodes) (Set.toList $ f' ^. phyloClique_nodes)) mem
let fMax = filter (\f' -> not $ isNested (f ^. phyloClique_nodes) (f' ^. phyloClique_nodes)) mem
in fMax ++ [f] ) [] l)
$ elems m
clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) clq'
-- | To transform a time map of docs innto a time map of Fis with some filters
-- | To transform a time map of docs into a time map of Fis with some filters
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested
-- $ traceFis "Filtered by clique size"
$ filterClique True s' (filterCliqueBySize)
-- $ traceFis "Filtered by support"
$ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis"
phyloClique
MaxClique _ -> undefined
Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested
-- $ traceFis "Filtered by clique size"
$ filterClique True s' (filterCliqueBySize)
-- $ traceFis "Filtered by support"
$ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis"
phyloClique
MaxClique s -> filterClique True s (filterCliqueBySize)
phyloClique
where
--------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = case (clique $ getConfig phylo) of
Fis _ _ -> let fis = map (\(prd,docs) ->
let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in (prd, map (\f -> PhyloClique (fst f) (snd f) prd) lst))
Fis _ _ ->
let fis = map (\(prd,docs) ->
let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) (snd f) prd) lst))
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
MaxClique _ -> undefined
MaxClique _ ->
let mcl = map (\(prd,docs) ->
let cooc = map round
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques 0 cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
--------------------------------------
-- dev viz graph maxClique getMaxClique
......
......@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy)
import Data.Set (Set, size, disjoint)
import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty)
import Data.String (String)
import Data.Text (Text, unwords)
......@@ -178,7 +178,7 @@ traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>
where
--------------------------------------
cliques :: [Double]
cliques = sort $ map (fromIntegral . size . _phyloClique_nodes) $ concat $ elems mFis
cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis
--------------------------------------
......@@ -229,6 +229,9 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix :: [Int] -> Map (Int,Int) Double
listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToSeq :: Eq a => [a] -> [(a,a)]
listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment