Commit eac6ceb5 authored by Quentin Lobbé's avatar Quentin Lobbé

refactoring Phylo.hs

parent c35221a6
Pipeline #312 failed with stage
......@@ -43,21 +43,6 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
------------------------------------------------------------------------
data PhyloQuery = PhyloQuery
{ _phyloQuery_phyloName :: Text
, _phyloQuery_phyloDescription :: Text
, _phyloQuery_timeGrain :: Int
, _phyloQuery_timeSteps :: Int
, _phyloQuery_fstCluster :: Clustering
, _phyloQuery_timeMatching :: Proximity
, _phyloQuery_nthLevel :: Level
, _phyloQuery_nthCluster :: Clustering
} deriving (Show)
data PhyloExport =
PhyloExport { _phyloExport_param :: PhyloParam
, _phyloExport_data :: Phylo
......@@ -201,21 +186,53 @@ data PhyloError = LevelDoesNotExist
deriving (Show)
-- | A List of Proximity methods names
data ProximityName = WeightedLogJaccard | Hamming | Filiation deriving (Show)
-- | A List of Clustering methods names
data ClusteringName = Louvain | RelatedComponents | FrequentItemSet deriving (Show)
------------------------------------------------------------------------
-- | To create a Phylo | --
-- | PhyloQuery | --
-- | A PhyloQuery is the structured representation of a user query to create a Phylo
data PhyloQuery = PhyloQuery
{ _q_phyloName :: Text
, _q_phyloDescription :: Text
-- Grain and Steps for seting up the periods
, _q_periodGrain :: Int
, _q_periodSteps :: Int
-- First clustering methods (ie: level 1)
, _q_fstCluster :: QueryClustering
-- Inter temporal matching method
, _q_interTemporalMatching :: QueryProximity
-- Level max of reconstruction of the Phylo && clustering methods to level max
, _q_nthLevel :: Level
, _q_nthCluster :: QueryClustering
} deriving (Show)
data Filiation = Ascendant | Descendant | Complete deriving (Show)
data EdgeType = PeriodEdge | LevelEdge deriving (Show)
-- | Reconstruction treatments
data Proximity = WeightedLogJaccard | Hamming | Filiation deriving (Show)
data Clustering = Louvain | RelatedComponents | FrequentItemSet deriving (Show)
-- | A constructor for Proximities
data Proximity = Proximity
{ _proximity_name :: ProximityName
, _proximity_params :: Map Text Double
, _proximity_threshold :: Maybe Double } deriving (Show)
data QueryProximity = QueryProximity
{ _qp_name :: Proximity
, _qp_pNum :: Map Text Double
, _qp_threshold :: Maybe Double } deriving (Show)
-- | A constructor for Clustering
data Clustering = Clustering
{ _clustering_name :: ClusteringName
, _clustering_params :: Map Text Double
, _clustering_paramsBool :: Map Text Bool
, _clustering_proximity :: Maybe Proximity } deriving (Show)
data QueryClustering = QueryClustering
{ _qc_name :: Clustering
, _qc_pNum :: Map Text Double
, _qc_pBool :: Map Text Bool
, _qc_proximity :: Maybe QueryProximity } deriving (Show)
------------------------------------------------------------------------
-- | To export a Phylo | --
......@@ -224,9 +241,6 @@ data Clustering = Clustering
-- | PhyloView | --
data Filiation = Ascendant | Descendant | Complete deriving (Show)
data EdgeType = PeriodEdge | LevelEdge deriving (Show)
data PhyloView = PhyloView
{ _phylo_viewParam :: PhyloParam
, _phylo_viewLabel :: Text
......@@ -265,50 +279,50 @@ data PhyloNode = PhyloNode
, _phylo_nodeChilds :: [PhyloNode]
} deriving (Show)
-- | PhyloQuery | --
-- | PhyloQueryView | --
-- | Post reconstruction treatments
data Filter = LonelyBranch
data Metric = BranchAge
data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics
data Sort = ByBranchAge
data Order = Asc | Desc
data DisplayMode = Flat | Nested
-- | A query filter seen as : prefix && ((filter params)(clause))
-- | A constructor for filters
data QueryFilter = QueryFilter
{ _query_filter :: Filter
, _query_params :: [Double]
{ _qf_name :: Filter
, _qf_pNum :: Map Text Double
, _qf_pBool :: Map Text Bool
}
-- | A PhyloQueryView is the structured representation of a user query to be applied to a Phylo
data PhyloQueryView = PhyloQueryView
{ _query_lvl :: Level
{ _qv_lvl :: Level
-- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ?
, _query_filiation :: Filiation
, _qv_filiation :: Filiation
-- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
, _query_childs :: Bool
, _query_childsDepth :: Level
, _qv_childs :: Bool
, _qv_childsDepth :: Level
-- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
-- Firstly the metrics, then the filters and the taggers
, _query_metrics :: [Metric]
, _query_filters :: [QueryFilter]
, _query_taggers :: [Tagger]
, _qv_metrics :: [Metric]
, _qv_filters :: [QueryFilter]
, _qv_taggers :: [Tagger]
-- An asc or desc sort to apply to the PhyloGraph
, _query_sort :: Maybe (Sort,Order)
, _qv_sort :: Maybe (Sort,Order)
-- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
, _query_display :: DisplayMode
, _query_verbose :: Bool
, _qv_display :: DisplayMode
, _qv_verbose :: Bool
}
......@@ -329,8 +343,8 @@ makeLenses ''PhyloQueryView
makeLenses ''PhyloBranch
makeLenses ''PhyloNode
makeLenses ''PhyloEdge
makeLenses ''Proximity
makeLenses ''Clustering
makeLenses ''QueryProximity
makeLenses ''QueryClustering
makeLenses ''QueryFilter
makeLenses ''PhyloQuery
......@@ -342,12 +356,12 @@ $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
--
$(deriveJSON (unPrefix "_software_" ) ''Software )
$(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
$(deriveJSON (unPrefix "_clustering_" ) ''Clustering )
$(deriveJSON (unPrefix "_proximity_" ) ''Proximity )
$(deriveJSON (unPrefix "") ''ProximityName )
$(deriveJSON (unPrefix "") ''ClusteringName )
$(deriveJSON (unPrefix "_phyloQuery_" ) ''PhyloQuery )
$(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
--
$(deriveJSON (unPrefix "_q_" ) ''PhyloQuery )
$(deriveJSON (unPrefix "_qc_" ) ''QueryClustering )
$(deriveJSON (unPrefix "_qp_" ) ''QueryProximity )
$(deriveJSON (unPrefix "") ''Proximity )
$(deriveJSON (unPrefix "") ''Clustering )
-- | TODO XML instances
......@@ -37,14 +37,14 @@ import qualified Data.Set as Set
-- | To apply a Clustering method to a PhyloGraph
graphToClusters :: Clustering -> GroupGraph -> [Cluster]
graphToClusters clust (nodes,edges) = case clust ^. clustering_name of
graphToClusters :: QueryClustering -> GroupGraph -> [Cluster]
graphToClusters clust (nodes,edges) = case clust ^. qc_name of
Louvain -> undefined -- louvain (nodes,edges)
RelatedComponents -> relatedComp 0 (head nodes) (tail nodes,edges) [] []
-- | To transform a Phylo into Clusters of PhyloGroups at a given level
phyloToClusters :: Level -> Proximity -> Clustering -> Phylo -> Map (Date,Date) [Cluster]
phyloToClusters :: Level -> QueryProximity -> QueryClustering -> Phylo -> Map (Date,Date) [Cluster]
phyloToClusters lvl prox clus p = Map.fromList
$ zip (getPhyloPeriods p)
(map (\prd -> let graph = groupsToGraph prox (getGroupsWithFilters lvl prd p) p
......
......@@ -44,19 +44,19 @@ graphToBranches lvl (nodes,edges) p = concat
-- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
groupsToGraph :: Proximity -> [PhyloGroup] -> Phylo -> GroupGraph
groupsToGraph :: QueryProximity -> [PhyloGroup] -> Phylo -> GroupGraph
groupsToGraph prox groups p = (groups,edges)
where
edges :: GroupEdges
edges = case prox ^. proximity_name of
edges = case prox ^. qp_name of
Filiation -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
++
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
WeightedLogJaccard -> filter (\edge -> snd edge >= (fromJust (prox ^. proximity_threshold)))
WeightedLogJaccard -> filter (\edge -> snd edge >= (fromJust (prox ^. qp_threshold)))
$ map (\(x,y) -> ((x,y), weightedLogJaccard
(getSensibility prox) (getGroupCooc x)
(unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
Hamming -> filter (\edge -> snd edge <= (fromJust (prox ^. proximity_threshold)))
Hamming -> filter (\edge -> snd edge <= (fromJust (prox ^. qp_threshold)))
$ map (\(x,y) -> ((x,y), hamming (getGroupCooc x)
(unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ listToDirectedCombi groups
_ -> undefined
......@@ -72,5 +72,5 @@ setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst . head) $ fi
bs = graphToBranches lvl graph p
--------------------------------------
graph :: GroupGraph
graph = groupsToGraph (Proximity Filiation empty Nothing) (getGroupsWithLevel lvl p) p
graph = groupsToGraph (QueryProximity Filiation empty Nothing) (getGroupsWithLevel lvl p) p
--------------------------------------
\ No newline at end of file
This diff is collapsed.
......@@ -155,7 +155,7 @@ initPhylo g s c a f = addPhyloLevel 0 (corpusToDocs f c base) base
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
toNthLevel :: Level -> Proximity -> Clustering -> Phylo -> Phylo
toNthLevel :: Level -> QueryProximity -> QueryClustering -> Phylo -> Phylo
toNthLevel lvlMax prox clus p
| lvl >= lvlMax = p
| otherwise = toNthLevel lvlMax prox clus
......@@ -164,7 +164,7 @@ toNthLevel lvlMax prox clus p
$ interTempoMatching Ascendant (lvl + 1) prox
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
(phyloToClusters lvl (fromJust $ clus ^. clustering_proximity) clus p) p
(phyloToClusters lvl (fromJust $ clus ^. qc_proximity) clus p) p
where
--------------------------------------
lvl :: Level
......@@ -173,7 +173,7 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Methods
toPhylo1 :: Clustering -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 :: QueryClustering -> QueryProximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clst proxy d p = case getClusterName clst of
FrequentItemSet -> setPhyloBranches 1
$ interTempoMatching Descendant 1 proxy
......@@ -184,7 +184,7 @@ toPhylo1 clst proxy d p = case getClusterName clst of
where
--------------------------------------
phyloFis :: Map (Date, Date) [Fis]
phyloFis = filterFisBySupport (getClusterParamBool clst "emptyFis") (round $ getClusterParam clst "supportInf") (filterFisByNested (docsToFis d))
phyloFis = filterFisBySupport (getClusterPBool clst "emptyFis") (round $ getClusterPNum clst "supportInf") (filterFisByNested (docsToFis d))
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.PhyloMaker.toPhylo1] fst clustering not recognized"
......@@ -201,7 +201,7 @@ toPhyloBase q c a = initPhyloBase periods foundations
where
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getTimeGrain q) (getTimeSteps q)
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both fst (head c,last c)
--------------------------------------
foundations :: Vector Ngrams
......@@ -211,11 +211,11 @@ toPhyloBase q c a = initPhyloBase periods foundations
-- | To reconstruct a Phylomemy from a PhyloQuery, a Corpus and a list of actants
toPhylo :: PhyloQuery -> [(Date, Text)] -> [Ngrams] -> Phylo
toPhylo q c a = toNthLevel (getNthLevel q) (getTimeMatching q) (getNthCluster q) phylo1
toPhylo q c a = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getFstCluster q) (getTimeMatching q) phyloDocs phylo0
phylo1 = toPhylo1 (getFstCluster q) (getInterTemporalMatching q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
......
......@@ -85,8 +85,8 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (linkGroupsByLevel (lvl,lvl') p) p
-- | To apply the corresponding proximity function based on a given Proximity
getProximity :: Proximity -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
getProximity prox g1 g2 = case (prox ^. proximity_name) of
getProximity :: QueryProximity -> PhyloGroup -> PhyloGroup -> (PhyloGroupId, Double)
getProximity prox g1 g2 = case (prox ^. qp_name) of
WeightedLogJaccard -> ((getGroupId g2),weightedLogJaccard (getSensibility prox) (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
Hamming -> ((getGroupId g2),hamming (getGroupCooc g1) (unifySharedKeys (getGroupCooc g2) (getGroupCooc g1)))
_ -> panic ("[ERR][Viz.Phylo.Example.getProximity] Proximity function not defined")
......@@ -122,7 +122,7 @@ getNextPeriods to id l = case to of
-- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units )
findBestCandidates :: Filiation -> Int -> Int -> Proximity -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
findBestCandidates :: Filiation -> Int -> Int -> QueryProximity -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)]
findBestCandidates to depth max prox group p
| depth > max || null next = []
| (not . null) best = take 2 best
......@@ -141,9 +141,9 @@ findBestCandidates to depth max prox group p
best :: [(PhyloGroupId, Double)]
best = reverse
$ sortOn snd
$ filter (\(id,score) -> case (prox ^. proximity_name) of
WeightedLogJaccard -> score >= fromJust (prox ^. proximity_threshold)
Hamming -> score <= fromJust (prox ^. proximity_threshold)) scores
$ filter (\(id,score) -> case (prox ^. qp_name) of
WeightedLogJaccard -> score >= fromJust (prox ^. qp_threshold)
Hamming -> score <= fromJust (prox ^. qp_threshold)) scores
--------------------------------------
......@@ -161,7 +161,7 @@ makePair to group ids = case to of
-- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching :: Filiation -> Level -> QueryProximity -> Phylo -> Phylo
interTempoMatching to lvl prox p = alterPhyloGroups
(\groups ->
map (\group ->
......
......@@ -140,27 +140,41 @@ getBranchMeta k b = (b ^. phylo_branchMeta) ! k
-- | To get the Name of a Clustering Methods
getClusterName :: Clustering -> ClusteringName
getClusterName c = _clustering_name c
getClusterName :: QueryClustering -> Clustering
getClusterName c = _qc_name c
-- | To get the params of a Clustering Methods
getClusterParam :: Clustering -> Text -> Double
getClusterParam c k = if (member k $ _clustering_params c)
then (_clustering_params c) Map.! k
getClusterPNum :: QueryClustering -> Text -> Double
getClusterPNum c k = if (member k $ _qc_pNum c)
then (_qc_pNum c) Map.! k
else panic "[ERR][Viz.Phylo.Tools.getClusterParam] the key is not in params"
-- | To get the boolean params of a Clustering Methods
getClusterParamBool :: Clustering -> Text -> Bool
getClusterParamBool c k = if (member k $ _clustering_paramsBool c)
then (_clustering_paramsBool c) Map.! k
getClusterPBool :: QueryClustering -> Text -> Bool
getClusterPBool c k = if (member k $ _qc_pBool c)
then (_qc_pBool c) Map.! k
else panic "[ERR][Viz.Phylo.Tools.getClusterParamBool] the key is not in paramsBool"
-- | To get a numeric param from a given QueryFilter
getFilterPNum :: QueryFilter -> Text -> Double
getFilterPNum f k = if (member k $ f ^. qf_pNum)
then (f ^. qf_pNum) Map.! k
else panic "[ERR][Viz.Phylo.Tools.getFilterPNum] the key is not in pNum"
-- | To get a boolean param from a given QueryFilter
getFilterPBool :: QueryFilter -> Text -> Bool
getFilterPBool f k = if (member k $ f ^. qf_pBool)
then (f ^. qf_pBool) Map.! k
else panic "[ERR][Viz.Phylo.Tools.getFilterPBool] the key is not in pBool"
-- | To get the first clustering method to apply to get the level 1 of a Phylo
getFstCluster :: PhyloQuery -> Clustering
getFstCluster q = q ^. phyloQuery_fstCluster
getFstCluster :: PhyloQuery -> QueryClustering
getFstCluster q = q ^. q_fstCluster
-- | To get the foundations of a Phylo
......@@ -380,13 +394,13 @@ getNodesInBranches v = filter (\n -> isJust $ n ^. phylo_nodeBranchId)
-- | To get the cluster methods to apply to the Nths levels of a Phylo
getNthCluster :: PhyloQuery -> Clustering
getNthCluster q = q ^. phyloQuery_nthCluster
getNthCluster :: PhyloQuery -> QueryClustering
getNthCluster q = q ^. q_nthCluster
-- | To get the Sup Level of a reconstruction of a Phylo from a PhyloQuery
getNthLevel :: PhyloQuery -> Level
getNthLevel q = q ^. phyloQuery_nthLevel
getNthLevel q = q ^. q_nthLevel
-- | To get the PhylolevelId of a given PhyloLevel
......@@ -411,9 +425,9 @@ getPhyloPeriodId prd = _phylo_periodId prd
-- | To get the sensibility of a Proximity if it exists
getSensibility :: Proximity -> Double
getSensibility prox = if (member "sensibility" $ prox ^. proximity_params)
then (prox ^. proximity_params) ! "sensibility"
getSensibility :: QueryProximity -> Double
getSensibility prox = if (member "sensibility" $ prox ^. qp_pNum)
then (prox ^. qp_pNum) ! "sensibility"
else panic "[ERR][Viz.Phylo.Tools.getSensibility] sensibility not in params"
......@@ -428,18 +442,18 @@ getTargetId e = e ^. phylo_edgeTarget
-- | To get the Grain of the PhyloPeriods from a PhyloQuery
getTimeGrain :: PhyloQuery -> Int
getTimeGrain q = q ^. phyloQuery_timeGrain
getPeriodGrain :: PhyloQuery -> Int
getPeriodGrain q = q ^. q_periodGrain
-- | To get the intertemporal matching strategy to apply to a Phylo from a PhyloQuery
getTimeMatching :: PhyloQuery -> Proximity
getTimeMatching q = q ^. phyloQuery_timeMatching
getInterTemporalMatching :: PhyloQuery -> QueryProximity
getInterTemporalMatching q = q ^. q_interTemporalMatching
-- | To get the Steps of the PhyloPeriods from a PhyloQuery
getTimeSteps :: PhyloQuery -> Int
getTimeSteps q = q ^. phyloQuery_timeSteps
getPeriodSteps :: PhyloQuery -> Int
getPeriodSteps q = q ^. q_periodSteps
-- | To get all the PhyloBranchIds of a PhyloView
......
......@@ -77,10 +77,14 @@ filterLonelyBranch nbInf nbSup nbNs prds v = cleanNodesEdges v v'
--------------------------------------
-- | To process a list of QueryFilter to a PhyloView
processFilters :: [QueryFilter] -> Phylo -> PhyloView -> PhyloView
processFilters fs p v = foldl (\v' f -> case f ^. query_filter of
LonelyBranch -> filterLonelyBranch (round $ (f ^. query_params) !! 0)
(round $ (f ^. query_params) !! 1)
(round $ (f ^. query_params) !! 2) (getPhyloPeriods p) v'
processFilters fs p v = foldl (\v' f -> case f ^. qf_name of
LonelyBranch -> filterLonelyBranch (round $ getFilterPNum f "nbInf")
(round $ getFilterPNum f "nbSup")
(round $ getFilterPNum f "nbNs") (getPhyloPeriods p) v'
_ -> panic "[ERR][Viz.Phylo.View.Filters.processFilters] filter not found") v fs
\ No newline at end of file
......@@ -126,13 +126,13 @@ addChildNodes shouldDo lvl lvlMin vb fl p v =
-- | To transform a PhyloQuery into a PhyloView
queryToView :: PhyloQueryView -> Phylo -> PhyloView
queryToView q p = processDisplay (q ^. query_display)
$ processSort (q ^. query_sort) p
$ processTaggers (q ^. query_taggers) p
$ processFilters (q ^. query_filters) p
$ processMetrics (q ^. query_metrics) p
$ addChildNodes (q ^. query_childs) (q ^. query_lvl) (q ^. query_childsDepth) (q ^. query_verbose) (q ^. query_filiation) p
$ initPhyloView (q ^. query_lvl) "Phylo2000" "This is a Phylo" (q ^. query_filiation) (q ^. query_verbose) p
queryToView q p = processDisplay (q ^. qv_display)
$ processSort (q ^. qv_sort) p
$ processTaggers (q ^. qv_taggers) p
$ processFilters (q ^. qv_filters) p
$ processMetrics (q ^. qv_metrics) p
$ addChildNodes (q ^. qv_childs) (q ^. qv_lvl) (q ^. qv_childsDepth) (q ^. qv_verbose) (q ^. qv_filiation) p
$ initPhyloView (q ^. qv_lvl) "Phylo2000" "This is a Phylo" (q ^. qv_filiation) (q ^. qv_verbose) p
-- | dirty params
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment