Commit 0b69015c authored by Quentin Lobbé's avatar Quentin Lobbé

Add new types for Cluster, Proximity, Filter, etc

parent eac6ceb5
......@@ -30,7 +30,7 @@ one 8, e54847.
module Gargantext.Viz.Phylo where
import Control.Lens (makeLenses)
import Data.Aeson.TH (deriveJSON)
import Data.Aeson.TH (deriveJSON,defaultOptions)
import Data.Maybe (Maybe)
import Data.Text (Text)
import Data.Set (Set)
......@@ -155,12 +155,7 @@ type Pointer = (PhyloGroupId, Weight)
type Ngrams = Text
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs
type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
type Fis = (Clique,Support)
-- | Aggregates | --
-- | Document : a piece of Text linked to a Date
......@@ -170,7 +165,16 @@ data Document = Document
} deriving (Show)
type Cluster = [PhyloGroup]
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs
type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
type PhyloFis = (Clique,Support)
-- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup]
-- | A List of PhyloGroup in a Graph
......@@ -181,66 +185,127 @@ type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
type GroupGraph = (GroupNodes,GroupEdges)
---------------
-- | Error | --
---------------
data PhyloError = LevelDoesNotExist
| LevelUnassigned
deriving (Show)
-----------------
-- | Cluster | --
-----------------
-- | Cluster constructors
data Cluster = Fis FisParams
| RelatedComponents RCParams
| Louvain LouvainParams
deriving (Show)
-- | Parameters for Fis clustering
data FisParams = FisParams
{ _fis_filtered :: Bool
, _fis_keepMinorFis :: Bool
, _fis_minSupport :: Support
} deriving (Show)
------------------------------------------------------------------------
-- | To create a Phylo | --
-- | Parameters for RelatedComponents clustering
data RCParams = RCParams
{ _rc_proximity :: Proximity } deriving (Show)
-- | Parameters for Louvain clustering
data LouvainParams = LouvainParams
{ _louvain_proximity :: Proximity } deriving (Show)
-- | PhyloQuery | --
-------------------
-- | Proximity | --
-------------------
-- | Proximity constructors
data Proximity = WeightedLogJaccard WLJParams
| Hamming HammingParams
| Filiation
deriving (Show)
-- | Parameters for WeightedLogJaccard proximity
data WLJParams = WLJParams
{ _wlj_threshold :: Double
, _wlj_sensibility :: Double
} deriving (Show)
-- | Parameters for Hamming proximity
data HammingParams = HammingParams
{ _hamming_threshold :: Double } deriving (Show)
-- | A PhyloQuery is the structured representation of a user query to create a Phylo
----------------
-- | Filter | --
----------------
-- | Filter constructors
data Filter = LonelyBranch LBParams deriving (Show)
-- | Parameters for LonelyBranch filter
data LBParams = LBParams
{ _lb_periodsInf :: Int
, _lb_periodsSup :: Int
, _lb_minNodes :: Int } deriving (Show)
----------------
-- | Metric | --
----------------
-- | Metric constructors
data Metric = BranchAge deriving (Show)
----------------
-- | Tagger | --
----------------
-- | Tagger constructors
data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics deriving (Show)
--------------
-- | Sort | --
--------------
-- | Sort constructors
data Sort = ByBranchAge deriving (Show)
data Order = Asc | Desc deriving (Show)
--------------------
-- | PhyloQuery | --
--------------------
-- | A Phyloquery describes a phylomemic reconstruction
data PhyloQuery = PhyloQuery
{ _q_phyloName :: Text
, _q_phyloDescription :: Text
{ _q_phyloName :: Text
, _q_phyloDesc :: Text
-- Grain and Steps for seting up the periods
-- Grain and Steps for the PhyloPeriods
, _q_periodGrain :: Int
, _q_periodSteps :: Int
-- First clustering methods (ie: level 1)
, _q_fstCluster :: QueryClustering
-- Clustering method for making level 1 of the Phylo
, _q_cluster :: Cluster
-- Inter temporal matching method
, _q_interTemporalMatching :: QueryProximity
-- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: Proximity
-- Level max of reconstruction of the Phylo && clustering methods to level max
-- Last level of reconstruction
, _q_nthLevel :: Level
, _q_nthCluster :: QueryClustering
-- Clustering method used from level 1 to nthLevel
, _q_nthCluster :: Cluster
} deriving (Show)
data Filiation = Ascendant | Descendant | Complete deriving (Show)
data EdgeType = PeriodEdge | LevelEdge deriving (Show)
data Filiation = Ascendant | Descendant | Complete deriving (Show)
data EdgeType = PeriodEdge | LevelEdge deriving (Show)
-- | Reconstruction treatments
data Proximity = WeightedLogJaccard | Hamming | Filiation deriving (Show)
data Clustering = Louvain | RelatedComponents | FrequentItemSet deriving (Show)
-- | A constructor for Proximities
data QueryProximity = QueryProximity
{ _qp_name :: Proximity
, _qp_pNum :: Map Text Double
, _qp_threshold :: Maybe Double } deriving (Show)
-- | A constructor for Clustering
data QueryClustering = QueryClustering
{ _qc_name :: Clustering
, _qc_pNum :: Map Text Double
, _qc_pBool :: Map Text Bool
, _qc_proximity :: Maybe QueryProximity } deriving (Show)
------------------------------------------------------------------------
-- | To export a Phylo | --
-------------------
-- | PhyloView | --
-------------------
-- | A PhyloView is the output type of a Phylo
data PhyloView = PhyloView
{ _phylo_viewParam :: PhyloParam
, _phylo_viewLabel :: Text
......@@ -252,14 +317,13 @@ data PhyloView = PhyloView
, _phylo_viewEdges :: [PhyloEdge]
} deriving (Show)
-- | A phyloview is made of PhyloBranches, edges and nodes
data PhyloBranch = PhyloBranch
{ _phylo_branchId :: PhyloBranchId
, _phylo_branchLabel :: Text
, _phylo_branchMeta :: Map Text Double
} deriving (Show)
data PhyloEdge = PhyloEdge
{ _phylo_edgeSource :: PhyloGroupId
, _phylo_edgeTarget :: PhyloGroupId
......@@ -267,7 +331,6 @@ data PhyloEdge = PhyloEdge
, _phylo_edgeWeight :: Weight
} deriving (Show)
data PhyloNode = PhyloNode
{ _phylo_nodeId :: PhyloGroupId
, _phylo_nodeBranchId :: Maybe PhyloBranchId
......@@ -279,28 +342,13 @@ data PhyloNode = PhyloNode
, _phylo_nodeChilds :: [PhyloNode]
} deriving (Show)
------------------------
-- | PhyloQueryView | --
------------------------
-- | Post reconstruction treatments
data Filter = LonelyBranch
data Metric = BranchAge
data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics
data Sort = ByBranchAge
data Order = Asc | Desc
data DisplayMode = Flat | Nested
-- | A constructor for filters
data QueryFilter = QueryFilter
{ _qf_name :: Filter
, _qf_pNum :: Map Text Double
, _qf_pBool :: Map Text Bool
}
-- | A PhyloQueryView is the structured representation of a user query to be applied to a Phylo
-- | A PhyloQueryView describes a Phylo as an output view
data PhyloQueryView = PhyloQueryView
{ _qv_lvl :: Level
......@@ -314,7 +362,7 @@ data PhyloQueryView = PhyloQueryView
-- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
-- Firstly the metrics, then the filters and the taggers
, _qv_metrics :: [Metric]
, _qv_filters :: [QueryFilter]
, _qv_filters :: [Filter]
, _qv_taggers :: [Tagger]
-- An asc or desc sort to apply to the PhyloGraph
......@@ -325,30 +373,35 @@ data PhyloQueryView = PhyloQueryView
, _qv_verbose :: Bool
}
----------------
-- | Lenses | --
----------------
------------------------------------------------------------------------
-- | Lenses and Json | --
-- | Lenses
makeLenses ''Phylo
makeLenses ''PhyloParam
makeLenses ''PhyloExport
makeLenses ''Software
--
makeLenses ''Phylo
makeLenses ''PhyloGroup
makeLenses ''PhyloLevel
makeLenses ''PhyloPeriod
makeLenses ''PhyloView
--
makeLenses ''Proximity
makeLenses ''Cluster
makeLenses ''Filter
--
makeLenses ''PhyloQuery
makeLenses ''PhyloQueryView
--
makeLenses ''PhyloView
makeLenses ''PhyloBranch
makeLenses ''PhyloNode
makeLenses ''PhyloEdge
makeLenses ''QueryProximity
makeLenses ''QueryClustering
makeLenses ''QueryFilter
makeLenses ''PhyloQuery
-- | JSON instances
------------------------
-- | JSON instances | --
------------------------
$(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
$(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
$(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
......@@ -358,10 +411,18 @@ $(deriveJSON (unPrefix "_software_" ) ''Software )
$(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
$(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
--
$(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
$(deriveJSON (unPrefix "_qc_" ) ''QueryClustering )
$(deriveJSON (unPrefix "_qp_" ) ''QueryProximity )
$(deriveJSON (unPrefix "") ''Proximity )
$(deriveJSON (unPrefix "") ''Clustering )
-- | TODO XML instances
$(deriveJSON defaultOptions ''Cluster )
$(deriveJSON defaultOptions ''Proximity )
--
$(deriveJSON (unPrefix "_fis_" ) ''FisParams )
$(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
$(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
$(deriveJSON (unPrefix "_rc_" ) ''RCParams )
$(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
--
$(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
----------------------------
-- | TODO XML instances | --
----------------------------
......@@ -37,14 +37,14 @@ import qualified Data.Set as Set
-- | To apply a Clustering method to a PhyloGraph
graphToClusters :: QueryClustering -> GroupGraph -> [Cluster]
graphToClusters clust (nodes,edges) = case clust ^. qc_name of
Louvain -> undefined -- louvain (nodes,edges)
RelatedComponents -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of
Louvain (LouvainParams _) -> undefined -- louvain (nodes,edges)
RelatedComponents (RCParams _) -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
-- | To transform a Phylo into Clusters of PhyloGroups at a given level
phyloToClusters :: Level -> QueryProximity -> QueryClustering -> Phylo -> Map (Date,Date) [Cluster]
phyloToClusters :: Level -> Proximity -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
phyloToClusters lvl prox clus p = Map.fromList
$ zip (getPhyloPeriods p)
(map (\prd -> let graph = groupsToGraph prox (getGroupsWithFilters lvl prd p) p
......
......@@ -33,7 +33,7 @@ import qualified Data.Set as Set
-- | To transform the Fis into a coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [Fis] -> Phylo -> Map (Int, Int) Double
fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
fisToCooc m p = map (/docs)
$ foldl (\mem x -> adjust (+1) (getKeyPair x mem) mem) cooc
$ concat
......
......@@ -36,24 +36,24 @@ import qualified Data.Vector as Vector
-- | To Filter Fis by support
filterFisBySupport :: Bool -> Int -> Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
filterFisBySupport empty min m = case empty of
True -> Map.map (\l -> filterMinorFis min l) m
False -> Map.map (\l -> keepFilled (filterMinorFis) min l) m
filterFisBySupport :: Bool -> Int -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisBySupport keep min m = case keep of
False -> Map.map (\l -> filterMinorFis min l) m
True -> Map.map (\l -> keepFilled (filterMinorFis) min l) m
-- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport False
filterMinorFis :: Int -> [Fis] -> [Fis]
-- | To filter Fis with small Support, to preserve nonempty periods please use : filterFisBySupport true
filterMinorFis :: Int -> [PhyloFis] -> [PhyloFis]
filterMinorFis min l = filter (\fis -> snd fis > min) l
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [Fis] -> Map (Date, Date) [Fis]
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head $ map fst l) (map fst l) []
in filter (\fis -> elem (fst fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [Fis]
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
docsToFis docs = map (\d -> Map.toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (words . text) d)) docs
\ No newline at end of file
......@@ -44,21 +44,20 @@ graphToBranches lvl (nodes,edges) p = concat
-- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
groupsToGraph :: QueryProximity -> [PhyloGroup] -> Phylo -> GroupGraph
groupsToGraph :: Proximity -> [PhyloGroup] -> Phylo -> GroupGraph
groupsToGraph prox groups p = (groups,edges)
where
edges :: GroupEdges
edges = case prox ^. qp_name of
edges = case prox of
Filiation -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
++
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
WeightedLogJaccard -> filter (\edge -> snd edge >= (fromJust (prox ^. qp_threshold)))
$ map (\(x,y) -> ((x,y), weightedLogJaccard
(getSensibility prox) (getGroupCooc x)
(unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
Hamming -> filter (\edge -> snd edge <= (fromJust (prox ^. qp_threshold)))
$ map (\(x,y) -> ((x,y), hamming (getGroupCooc x)
(unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
WeightedLogJaccard (WLJParams thr sens) -> filter (\edge -> snd edge >= thr)
$ map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y))))
$ listToDirectedCombi groups
Hamming (HammingParams thr) -> filter (\edge -> snd edge <= thr)
$ map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y))))
$ listToDirectedCombi groups
_ -> undefined
......@@ -72,5 +71,5 @@ setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst . head) $ fi
bs = graphToBranches lvl graph p
--------------------------------------
graph :: GroupGraph
graph = groupsToGraph (QueryProximity Filiation empty Nothing) (getGroupsWithLevel lvl p) p
graph = groupsToGraph Filiation (getGroupsWithLevel lvl p) p
--------------------------------------
\ No newline at end of file
......@@ -94,16 +94,10 @@ urlPhyloQuery = "title=Cesar et Cleôpatre&description=An example of Phylomemy (
phyloQuery :: PhyloQuery
phyloQuery = PhyloQuery "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3
(QueryClustering FrequentItemSet
(singleton "supportInf" 1)
(Map.fromList [("filterFis",True),("emptyFis",False)])
Nothing)
(QueryProximity WeightedLogJaccard
(singleton "sensibility" 0) (Just 0.01))
defaultFis
defaultWeightedLogJaccard
2
(QueryClustering RelatedComponents
empty empty
(Just (QueryProximity Filiation empty Nothing)))
defaultRelatedComponents
------------------------------------------------------------------------
......@@ -114,7 +108,7 @@ phyloQuery = PhyloQuery "Cesar et Cleôpatre" "An example of Phylomemy (french w
urlToQuery :: Text -> PhyloQueryView
urlToQuery url = defaultQuery
& qv_metrics %~ (++ [BranchAge])
& qv_filters %~ (++ [QueryFilter LonelyBranch (Map.fromList [("nbInf",2),("nbSup",2),("nbNs",1)]) empty])
& qv_filters %~ (++ [defaultLonelyBranch])
& qv_taggers %~ (++ [BranchLabelFreq,GroupLabelCooc])
......@@ -139,16 +133,16 @@ phyloView = toPhyloView urlQuery phylo6
phylo6 :: Phylo
phylo6 = toNthLevel 6 (QueryProximity WeightedLogJaccard (singleton "sensibility" 0) (Just 0.01)) (QueryClustering RelatedComponents empty empty (Just (QueryProximity Filiation empty Nothing))) phylo3
phylo6 = toNthLevel 6 defaultWeightedLogJaccard defaultRelatedComponents phylo3
phylo3 :: Phylo
phylo3 = setPhyloBranches 3
$ interTempoMatching Descendant 3 (QueryProximity WeightedLogJaccard (singleton "sensibility" 0) (Just 0.01))
$ interTempoMatching Ascendant 3 (QueryProximity WeightedLogJaccard (singleton "sensibility" 0) (Just 0.01))
$ interTempoMatching Descendant 3 defaultWeightedLogJaccard
$ interTempoMatching Ascendant 3 defaultWeightedLogJaccard
$ setLevelLinks (2,3)
$ addPhyloLevel 3
(phyloToClusters 2 (QueryProximity WeightedLogJaccard (singleton "sensibility" 0) (Just 0.01)) (QueryClustering RelatedComponents empty empty (Just (QueryProximity Filiation empty Nothing))) phyloBranch2)
(phyloToClusters 2 defaultWeightedLogJaccard defaultRelatedComponents phyloBranch2)
phyloBranch2
......@@ -160,11 +154,11 @@ phyloBranch2 = setPhyloBranches 2 phylo2_c
phylo2_c :: Phylo
phylo2_c = interTempoMatching Descendant 2 (QueryProximity WeightedLogJaccard (singleton "sensibility" 0) (Just 0.01)) phylo2_p
phylo2_c = interTempoMatching Descendant 2 defaultWeightedLogJaccard phylo2_p
phylo2_p :: Phylo
phylo2_p = interTempoMatching Ascendant 2 (QueryProximity WeightedLogJaccard (singleton "sensibility" 0) (Just 0.01)) phylo2_1_2
phylo2_p = interTempoMatching Ascendant 2 defaultWeightedLogJaccard phylo2_1_2
phylo2_1_2 :: Phylo
......@@ -176,8 +170,8 @@ phylo2 :: Phylo
phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [Cluster]
phyloCluster = phyloToClusters 1 (QueryProximity WeightedLogJaccard (singleton "sensibility" 0) (Just 0.01)) (QueryClustering RelatedComponents empty empty (Just (QueryProximity Filiation empty Nothing))) phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 1 defaultWeightedLogJaccard defaultRelatedComponents phyloBranch1
------------------------------------------------------------------------
......@@ -193,11 +187,11 @@ phyloBranch1 = setPhyloBranches 1 phylo1_c
phylo1_c :: Phylo
phylo1_c = interTempoMatching Descendant 1 (QueryProximity WeightedLogJaccard (singleton "sensibility" 0) (Just 0.01)) phylo1_p
phylo1_c = interTempoMatching Descendant 1 defaultWeightedLogJaccard phylo1_p
phylo1_p :: Phylo
phylo1_p = interTempoMatching Ascendant 1 (QueryProximity WeightedLogJaccard (singleton "sensibility" 0) (Just 0.01)) phylo1_0_1
phylo1_p = interTempoMatching Ascendant 1 defaultWeightedLogJaccard phylo1_0_1
------------------------------------------------------------------------
......@@ -228,7 +222,7 @@ phylo1 = addPhyloLevel (1) phyloFis phylo
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them
phyloFis :: Map (Date, Date) [Fis]
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = filterFisBySupport False 1 (filterFisByNested (docsToFis phyloDocs))
......
......@@ -51,7 +51,7 @@ class PhyloLevelMaker aggregate
toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
instance PhyloLevelMaker Cluster
instance PhyloLevelMaker PhyloCluster
where
--------------------------------------
-- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
......@@ -64,7 +64,7 @@ instance PhyloLevelMaker Cluster
--------------------------------------
instance PhyloLevelMaker Fis
instance PhyloLevelMaker PhyloFis
where
--------------------------------------
-- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
......@@ -94,7 +94,7 @@ instance PhyloLevelMaker Document
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> Cluster -> Map (Date,Date) [Cluster] -> Phylo -> PhyloGroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster] -> Phylo -> PhyloGroup
clusterToGroup prd lvl idx lbl groups m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams empty cooc Nothing [] [] [] (map (\g -> (getGroupId g, 1)) groups)
where
......@@ -111,7 +111,7 @@ clusterToGroup prd lvl idx lbl groups m p =
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> (Clique,Support) -> Map (Date, Date) [Fis] -> Phylo -> PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> (Clique,Support) -> Map (Date, Date) [PhyloFis] -> Phylo -> PhyloGroup
cliqueToGroup prd lvl idx lbl fis m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams (singleton "support" (fromIntegral $ snd fis)) cooc Nothing [] [] [] []
where
......@@ -155,7 +155,7 @@ initPhylo g s c a f = addPhyloLevel 0 (corpusToDocs f c base) base
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
toNthLevel :: Level -> QueryProximity -> QueryClustering -> Phylo -> Phylo
toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
toNthLevel lvlMax prox clus p
| lvl >= lvlMax = p
| otherwise = toNthLevel lvlMax prox clus
......@@ -164,7 +164,7 @@ toNthLevel lvlMax prox clus p
$ interTempoMatching Ascendant (lvl + 1) prox
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
(phyloToClusters lvl (fromJust $ clus ^. qc_proximity) clus p) p
(phyloToClusters lvl (getProximity clus) clus p) p
where
--------------------------------------
lvl :: Level
......@@ -172,22 +172,24 @@ toNthLevel lvlMax prox clus p
--------------------------------------
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Methods
toPhylo1 :: QueryClustering -> QueryProximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clst proxy d p = case getClusterName clst of
FrequentItemSet -> setPhyloBranches 1
$ interTempoMatching Descendant 1 proxy
$ interTempoMatching Ascendant 1 proxy
$ setLevelLinks (0,1)
$ setLevelLinks (1,0)
$ addPhyloLevel 1 phyloFis p
where
--------------------------------------
phyloFis :: Map (Date, Date) [Fis]
phyloFis = filterFisBySupport (getClusterPBool clst "emptyFis") (round $ getClusterPNum clst "supportInf") (filterFisByNested (docsToFis d))
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.PhyloMaker.toPhylo1] fst clustering not recognized"
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of
Fis (FisParams f k s) -> setPhyloBranches 1
$ interTempoMatching Descendant 1 prox
$ interTempoMatching Ascendant 1 prox
$ setLevelLinks (0,1)
$ setLevelLinks (1,0)
$ addPhyloLevel 1 phyloFis p
where
--------------------------------------
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = if f
then filterFisBySupport k s (filterFisByNested (docsToFis d))
else docsToFis d
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.PhyloMaker.toPhylo1] fst clustering not recognized"
-- | To reconstruct the Level 0 of a Phylo
......
......@@ -85,11 +85,11 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (linkGroupsByLevel (lvl,lvl') p) p
-- | To apply the corresponding proximity function based on a given Proximity
getProximity :: QueryProximity -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
getProximity prox g1 g2 = case (prox ^. qp_name) of
WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard (getSensibility prox) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
Hamming -> ((getGroupId g2),hamming (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
_ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
applyProximity prox g1 g2 = case prox of
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2),weightedLogJaccard s (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
Hamming (HammingParams _) -> ((getGroupId g2),hamming (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
_ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
......@@ -122,7 +122,7 @@ getNextPeriods to id l = case to of
-- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
findBestCandidates :: Filiation -> Int -> Int -> QueryProximity -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
findBestCandidates :: Filiation -> Int -> Int -> Proximity -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
findBestCandidates to depth max prox group p
| depth > max || null next = []
| (not . null) best = take 2 best
......@@ -136,14 +136,14 @@ findBestCandidates to depth max prox group p
candidates = getGroupsWithFilters (getGroupLevel group) (head next) p
--------------------------------------
scores :: [(PhyloGroupId, Double)]
scores = map (\group' -> getProximity prox group group') candidates
scores = map (\group' -> applyProximity prox group group') candidates
--------------------------------------
best :: [(PhyloGroupId, Double)]
best = reverse
$ sortOn snd
$ filter (\(id,score) -> case (prox ^. qp_name) of
WeightedLogJaccard -> score >= fromJust (prox ^. qp_threshold)
Hamming -> score <= fromJust (prox ^. qp_threshold)) scores
$ filter (\(id,score) -> case prox of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr) scores
--------------------------------------
......@@ -161,7 +161,7 @@ makePair to group ids = case to of
-- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
interTempoMatching :: Filiation -> Level -> QueryProximity -> Phylo -> Phylo
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching to lvl prox p = alterPhyloGroups
(\groups ->
map (\group ->
......
......@@ -13,13 +13,14 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Viz.Phylo.Tools
where
import Control.Lens hiding (both, Level)
import Data.List (filter, intersect, (++), sort, null, head, tail, last, tails, delete, nub, concat, union, sortOn)
import Data.Maybe (mapMaybe)
import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, elems, adjust, (!))
import Data.Set (Set)
import Data.Text (Text, toLower)
......@@ -139,42 +140,9 @@ getBranchMeta :: Text -> PhyloBranch -> Double
getBranchMeta k b = (b ^. phylo_branchMeta) ! k
-- | To get the Name of a Clustering Methods
getClusterName :: QueryClustering -> Clustering
getClusterName c = _qc_name c
-- | To get the params of a Clustering Methods
getClusterPNum :: QueryClustering -> Text -> Double
getClusterPNum c k = if (member k $ _qc_pNum c)
then (_qc_pNum c) Map.! k
else panic "[ERR][Viz.Phylo.Tools.getClusterParam] the key is not in params"
-- | To get the boolean params of a Clustering Methods
getClusterPBool :: QueryClustering -> Text -> Bool
getClusterPBool c k = if (member k $ _qc_pBool c)
then (_qc_pBool c) Map.! k
else panic "[ERR][Viz.Phylo.Tools.getClusterParamBool] the key is not in paramsBool"
-- | To get a numeric param from a given QueryFilter
getFilterPNum :: QueryFilter -> Text -> Double
getFilterPNum f k = if (member k $ f ^. qf_pNum)
then (f ^. qf_pNum) Map.! k
else panic "[ERR][Viz.Phylo.Tools.getFilterPNum] the key is not in pNum"
-- | To get a boolean param from a given QueryFilter
getFilterPBool :: QueryFilter -> Text -> Bool
getFilterPBool f k = if (member k $ f ^. qf_pBool)
then (f ^. qf_pBool) Map.! k
else panic "[ERR][Viz.Phylo.Tools.getFilterPBool] the key is not in pBool"
-- | To get the first clustering method to apply to get the level 1 of a Phylo
getFstCluster :: PhyloQuery -> QueryClustering
getFstCluster q = q ^. q_fstCluster
getFstCluster :: PhyloQuery -> Cluster
getFstCluster q = q ^. q_cluster
-- | To get the foundations of a Phylo
......@@ -394,7 +362,7 @@ getNodesInBranches v = filter (\n -> isJust $ n ^. phylo_nodeBranchId)
-- | To get the cluster methods to apply to the Nths levels of a Phylo
getNthCluster :: PhyloQuery -> QueryClustering
getNthCluster :: PhyloQuery -> Cluster
getNthCluster q = q ^. q_nthCluster
......@@ -424,13 +392,6 @@ getPhyloPeriodId :: PhyloPeriod -> PhyloPeriodId
getPhyloPeriodId prd = _phylo_periodId prd
-- | To get the sensibility of a Proximity if it exists
getSensibility :: QueryProximity -> Double
getSensibility prox = if (member "sensibility" $ prox ^. qp_pNum)
then (prox ^. qp_pNum) ! "sensibility"
else panic "[ERR][Viz.Phylo.Tools.getSensibility] sensibility not in params"
-- | To get the PhyloGroupId of the Source of a PhyloEdge
getSourceId :: PhyloEdge -> PhyloGroupId
getSourceId e = e ^. phylo_edgeSource
......@@ -447,7 +408,7 @@ getPeriodGrain q = q ^. q_periodGrain
-- | To get the intertemporal matching strategy to apply to a Phylo from a PhyloQuery
getInterTemporalMatching :: PhyloQuery -> QueryProximity
getInterTemporalMatching :: PhyloQuery -> Proximity
getInterTemporalMatching q = q ^. q_interTemporalMatching
......@@ -532,4 +493,74 @@ setPhyloLevelId lvl' (PhyloLevel (id, lvl) groups)
unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b
unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
then (y,x)
else (x,y) ) m1
\ No newline at end of file
else (x,y) ) m1
--------------------------------------------------
-- | PhyloQuery & PhyloQueryView Constructors | --
-- | Define a default value for each Proximity / Cluster
dft :: a -> Maybe a -> a
dft = fromMaybe
defaultFis :: Cluster
defaultFis = Fis (initFis Nothing Nothing Nothing)
defaultHamming :: Proximity
defaultHamming = Hamming (initHamming Nothing)
defaultLonelyBranch = LonelyBranch (initLonelyBranch Nothing Nothing Nothing)
defaultLouvain :: Cluster
defaultLouvain = Louvain (initLouvain Nothing)
defaultRelatedComponents :: Cluster
defaultRelatedComponents = RelatedComponents (initRelatedComponents Nothing)
defaultWeightedLogJaccard :: Proximity
defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing Nothing)
-- | To get the Proximity associated to a given Clustering method
getProximity :: Cluster -> Proximity
getProximity cluster = case cluster of
Louvain (LouvainParams proxi) -> proxi
RelatedComponents (RCParams proxi) -> proxi
_ -> panic "[ERR][Viz.Phylo.Tools.getProximity] this cluster has no associated Proximity"
-- | To initialize all the Cluster / Proximity with their default parameters
initFis :: Maybe Bool -> Maybe Bool -> Maybe Support -> FisParams
initFis (dft True -> flt) (dft True -> kmf) (dft 1 -> min) = FisParams flt kmf min
initHamming :: Maybe Double -> HammingParams
initHamming (dft 0.01 -> sens) = HammingParams sens
initLonelyBranch :: Maybe Int -> Maybe Int -> Maybe Int -> LBParams
initLonelyBranch (dft 2 -> periodsInf) (dft 2 -> periodsSup) (dft 1 -> minNodes) = LBParams periodsInf periodsSup minNodes
initLouvain :: Maybe Proximity -> LouvainParams
initLouvain (dft defaultWeightedLogJaccard -> proxi) = LouvainParams proxi
initRelatedComponents :: Maybe Proximity -> RCParams
initRelatedComponents (dft Filiation -> proxi) = RCParams proxi
initWeightedLogJaccard :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogJaccard (dft 0 -> thr) (dft 0.01 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQuery from given and default parameters
initPhyloQuery :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQuery
initPhyloQuery name desc (dft 5 -> grain) (dft 3 -> steps) (dft defaultFis -> cluster)
(dft defaultWeightedLogJaccard -> matching) (dft 2 -> nthLevel) (dft defaultRelatedComponents -> nthCluster) =
PhyloQuery name desc grain steps cluster matching nthLevel nthCluster
-- | To define some obvious boolean getters
shouldFilterFis :: FisParams -> Bool
shouldFilterFis = _fis_filtered
shouldKeepMinorFis :: FisParams -> Bool
shouldKeepMinorFis = _fis_keepMinorFis
\ No newline at end of file
......@@ -61,7 +61,7 @@ cleanNodesEdges v v' = v' & phylo_viewNodes %~ (filter (\n -> not $ elem (getNod
-- | To filter all the lonelyBranches (ie: isolated one in time & with a small number of nodes) of a PhyloView
filterLonelyBranch :: Int -> Int -> Int -> [PhyloPeriodId] -> PhyloView -> PhyloView
filterLonelyBranch nbInf nbSup nbNs prds v = cleanNodesEdges v v'
filterLonelyBranch inf sup min prds v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
......@@ -71,20 +71,15 @@ filterLonelyBranch nbInf nbSup nbNs prds v = cleanNodesEdges v v'
in not (isLone ns prds')))
--------------------------------------
isLone :: [PhyloNode] -> [PhyloPeriodId] -> Bool
isLone ns prds' = (length ns <= nbNs)
&& notElem (head prds') (take nbInf prds)
&& notElem (head prds') (take nbSup $ reverse prds)
isLone ns prds' = (length ns <= min)
&& notElem (head prds') (take inf prds)
&& notElem (head prds') (take sup $ reverse prds)
--------------------------------------
-- | To process a list of QueryFilter to a PhyloView
processFilters :: [QueryFilter] -> Phylo -> PhyloView -> PhyloView
processFilters fs p v = foldl (\v' f -> case f ^. qf_name of
LonelyBranch -> filterLonelyBranch (round $ getFilterPNum f "nbInf")
(round $ getFilterPNum f "nbSup")
(round $ getFilterPNum f "nbNs") (getPhyloPeriods p) v'
_ -> panic "[ERR][Viz.Phylo.View.Filters.processFilters] filter not found") v fs
\ No newline at end of file
processFilters :: [Filter] -> Phylo -> PhyloView -> PhyloView
processFilters fs p v = foldl (\v' f -> case f of
LonelyBranch (LBParams inf sup min) -> filterLonelyBranch inf sup min
(getPhyloPeriods p) v'
_ -> panic "[ERR][Viz.Phylo.View.Filters.processFilters] filter not found") v fs
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment