Commit 0f6c2e5b authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-phylo' into dev

parents 706dd25d 148b0dcc
......@@ -170,14 +170,25 @@ main = do
let sensibility = case (phyloProximity config) of
Hamming -> undefined
WeightedLogJaccard s -> (show s)
WeightedLogJaccard s -> (show s)
let sync = case (phyloSynchrony config) of
ByProximityThreshold t _ _ _ -> (show t)
ByProximityDistribution _ _ -> undefined
-- to be improved
-- let br_length = case (take 1 $ exportFilter config) of
-- ByBranchSize t -> (show t)
let output = (outputPath config)
<> (unpack $ phyloName config)
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-level_" <> (show (phyloLevel config))
<> "-" <> clq
<> "-sens_" <> sensibility
<> "-level_" <> (show (phyloLevel config))
<> "-sens_" <> sensibility
-- <> "-lenght_" <> br_length
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-sync_" <> sync
<> ".dot"
dotToFile output dot
......@@ -20,10 +20,9 @@ import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..))
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Types (MasterCorpusId, UserCorpusId)
-- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.Set as Set
......@@ -31,7 +30,7 @@ import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
{-
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !StopSize
......@@ -45,6 +44,7 @@ data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
-}
data StopSize = StopSize {unStopSize :: !Int}
......@@ -52,19 +52,19 @@ data StopSize = StopSize {unStopSize :: !Int}
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_len :: !Int
, unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize
}
| GroupIdentity
| GroupIdentity
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
......@@ -72,12 +72,18 @@ ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------------
------------------------------------------------------------------------
mergeMapParent :: Map Text (GroupedText b)
-> Map Text (Map Text Int)
-> Map Text (GroupedText b)
mergeMapParent = undefined
------------------------------------------------------------------------
toGroupedText :: Ord b
=> (Text -> Text)
-> (a -> b)
-> (a -> Set Text)
-> (a -> Set NodeId)
=> (Text -> Text )
-> (a -> b )
-> (a -> Set Text )
-> (a -> Set NodeId)
-> [(Text,a)]
-> Map Stem (GroupedText b)
toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
......@@ -108,7 +114,7 @@ groupStems' = Map.fromListWith grouping
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
------------------------------------------------------------------------------
------------------------------------------------------------------------
type Group = Lang -> Int -> Int -> Text -> Text
type Stem = Text
type Label = Text
......@@ -116,15 +122,15 @@ data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Label
, _gt_score :: !score
, _gt_group :: !(Set Text)
, _gt_children :: !(Set Text)
, _gt_size :: !Int
, _gt_stem :: !Stem
, _gt_nodes :: !(Set NodeId)
} deriving Show
{-
} {-deriving Show--}
--{-
instance Show score => Show (GroupedText score) where
show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
-}
--}
instance (Eq a) => Eq (GroupedText a) where
(==) (GroupedText _ _ score1 _ _ _ _)
......@@ -137,18 +143,13 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where
-- Lenses Instances
makeLenses 'GroupedText
------------------------------------------------------------------------------
------------------------------------------------------------------------
addListType :: Map Text ListType -> GroupedText a -> GroupedText a
addListType m g = set gt_listType (hasListType m g) g
where
hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
hasListType m' (GroupedText _ label _ g' _ _ _) =
List.foldl' (<>) Nothing
List.foldl' (<>) Nothing
$ map (\t -> Map.lookup t m')
$ Set.toList
$ Set.insert label g'
......@@ -58,8 +58,8 @@ buildNgramsLists :: ( RepoCmdM env err m
buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user uCid mCid gp
othersTerms <- mapM (buildNgramsOthersList user uCid (ngramsGroup GroupIdentity))
[ (Authors, MapListSize 9)
, (Sources, MapListSize 9)
[ (Authors , MapListSize 9)
, (Sources , MapListSize 9)
, (Institutes, MapListSize 9)
]
......@@ -83,12 +83,13 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt
let
grouped = toGroupedText groupIt (Set.size . snd) fst snd (Map.toList $ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b)) $ ngs)
grouped = toGroupedText groupIt (Set.size . snd) fst snd
(Map.toList $ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b)) $ ngs)
socialLists <- flowSocialList user nt (Set.fromList $ Map.keys ngs)
let
groupedWithList = map (addListType (invertForw socialLists)) grouped
groupedWithList = map (addListType (invertForw socialLists)) grouped
(stopTerms, tailTerms) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList
(mapTerms, tailTerms') = Map.partition (\t -> t ^. gt_listType == Just MapTerm) tailTerms
......
......@@ -35,6 +35,7 @@ import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------------------------------------------------------
flowSocialList :: ( RepoCmdM env err m
, CmdM env err m
, HasNodeError err
......@@ -56,7 +57,6 @@ flowSocialList user nt ngrams' = do
-- printDebug "* socialLists *: results \n" result
pure result
------------------------------------------------------------------------
unions :: (Ord a, Semigroup a, Semigroup b, Ord b)
=> [Map a (Set b)] -> Map a (Set b)
......@@ -75,13 +75,12 @@ invertBack = Map.fromListWith (<>)
unions_test :: Map ListType (Set Text)
unions_test = unions [m1, m2]
where
m1 = Map.fromList [ (StopTerm, Set.singleton "Candidate")]
m1 = Map.fromList [ (StopTerm , Set.singleton "Candidate")]
m2 = Map.fromList [ (CandidateTerm, Set.singleton "Candidate")
, (MapTerm, Set.singleton "Candidate")
, (MapTerm , Set.singleton "Candidate")
]
------------------------------------------------------------------------
termsByList :: ListType -> (Map (Maybe ListType) (Set Text)) -> Set Text
termsByList CandidateTerm m = Set.unions
$ map (\lt -> fromMaybe Set.empty $ Map.lookup lt m)
......@@ -108,7 +107,7 @@ flowSocialListByMode mode user nt ngrams' = do
-- printDebug "flowSocialListByMode r" r
pure r
---------------------------------------------------------------------------
------------------------------------------------------------------------
-- TODO: maybe use social groups too
toSocialList :: Map Text (Map ListType Int)
-> Set Text
......@@ -141,7 +140,7 @@ toSocialList1_testIsTrue = result == (Just MapTerm, Set.singleton token)
, (StopTerm , 3)
]
---------------------------------------------------------------------------
------------------------------------------------------------------------
-- | [ListId] does not merge the lists (it is for Master and User lists
-- here we need UserList only
countFilterList :: RepoCmdM env err m
......@@ -161,14 +160,83 @@ countFilterList' st nt ls input = do
-- printDebug "countFilterList'" ml
pure $ Set.foldl' (\m t -> countList t ml m) input st
---------------------------------------------------------------------------
------------------------------------------------------------------------
-- FIXME children have to herit the ListType of the parent
toMapTextListType :: Map Text NgramsRepoElement -> Map Text ListType
toMapTextListType m = Map.fromListWith (<>)
$ List.concat
$ (map (toList m))
$ Map.toList m
$ List.concat
$ map (toList m)
$ Map.toList m
----------------------
-- | Tools to inherit groupings
----------------------
type Parent = Text
parentUnionsMerge :: (Ord a, Ord b, Num c)
=> [Map a (Map b c)]
-> Map a (Map b c)
parentUnionsMerge = Map.unionsWith (Map.unionWith (+))
-- This Parent union is specific
-- [Private, Shared, Public]
-- means the following preferences:
-- Private > Shared > Public
-- if data have not been tagged privately, then use others tags
-- This unions behavior takes first key only and ignore others
parentUnionsExcl :: Ord a
=> [Map a b]
-> Map a b
parentUnionsExcl = Map.unions
hasParent :: Text
-> Map Text (Map Parent Int)
-> Maybe Parent
hasParent t m = case Map.lookup t m of
Nothing -> Nothing
Just m' -> (fst . fst) <$> Map.maxViewWithKey m'
toMapTextParent :: Set Text
-> Map Text (Map Parent Int)
-> [Map Text NgramsRepoElement]
-> Map Text (Map Parent Int)
toMapTextParent ts = foldl' (toMapTextParent' ts)
where
toMapTextParent' :: Set Text
-> Map Text (Map Parent Int)
-> Map Text NgramsRepoElement
-> Map Text (Map Parent Int)
toMapTextParent' ts' to from = Set.foldl' (toMapTextParent'' ts' from) to ts'
toMapTextParent'' :: Set Text
-> Map Text NgramsRepoElement
-> Map Text (Map Parent Int)
-> Text
-> Map Text (Map Parent Int)
toMapTextParent'' ss from to t = case Map.lookup t from of
Nothing -> to
Just nre -> case _nre_parent nre of
Just (NgramsTerm p') -> if Set.member p' ss
then Map.alter (addParent p') t to
else to
where
addParent p'' Nothing = Just $ addCountParent p'' Map.empty
addParent p'' (Just ps) = Just $ addCountParent p'' ps
addCountParent :: Parent -> Map Parent Int -> Map Parent Int
addCountParent p m = Map.alter addCount p m
where
addCount Nothing = Just 1
addCount (Just n) = Just $ n + 1
_ -> to
------------------------------------------------------------------------
toList :: Map Text NgramsRepoElement -> (Text, NgramsRepoElement) -> [(Text, ListType)]
toList m (t, nre@(NgramsRepoElement _ _ _ _ (MSet children))) =
List.zip terms (List.cycle [lt'])
......@@ -184,9 +252,10 @@ listOf m ng = case _nre_parent ng of
Nothing -> _nre_list ng
Just p -> case Map.lookup (unNgramsTerm p) m of
Just ng' -> listOf m ng'
Nothing -> panic "CandidateTerm -- Should Not happen"
Nothing -> CandidateTerm
-- panic "[G.C.T.L.Social.listOf] Nothing: Should Not happen"
---------------------------------------------------------------------------
------------------------------------------------------------------------
countList :: Text
-> Map Text ListType
-> Map Text (Map ListType Int)
......@@ -195,11 +264,11 @@ countList t m input = case Map.lookup t m of
Nothing -> input
Just l -> Map.alter addList t input
where
addList Nothing = Just $ addCount l Map.empty
addList (Just lm) = Just $ addCount l lm
addList Nothing = Just $ addCountList l Map.empty
addList (Just lm) = Just $ addCountList l lm
addCount :: ListType -> Map ListType Int -> Map ListType Int
addCount l m = Map.alter (plus l) l m
addCountList :: ListType -> Map ListType Int -> Map ListType Int
addCountList l m = Map.alter (plus l) l m
where
plus CandidateTerm Nothing = Just 1
plus CandidateTerm (Just x) = Just $ x + 1
......@@ -228,5 +297,3 @@ findNodes' Public r = findNodes Public r $ [NodeFolderPublic ] <> commonNodes
commonNodes:: [NodeType]
commonNodes = [NodeFolder, NodeCorpus, NodeList]
......@@ -47,7 +47,7 @@ cooc2graph' distance threshold myCooc = distanceMap
where
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 0) myCooc'
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat
......
......@@ -47,16 +47,16 @@ import Gargantext.Core.Text.Context (TermList)
-- | Global parameters of a Phylo
data PhyloParam =
PhyloParam { _phyloParam_version :: Text -- Double ?
, _phyloParam_software :: Software
, _phyloParam_query :: PhyloQueryBuild
PhyloParam { _phyloParam_version :: !Text -- Double ?
, _phyloParam_software :: !Software
, _phyloParam_query :: !PhyloQueryBuild
} deriving (Generic, Show, Eq)
-- | Software parameters
data Software =
Software { _software_name :: Text
, _software_version :: Text
Software { _software_name :: !Text
, _software_version :: !Text
} deriving (Generic, Show, Eq)
......@@ -70,21 +70,21 @@ data Software =
-- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
-- Periods : list of all the periods of a Phylo
data Phylo =
Phylo { _phylo_duration :: (Start, End)
, _phylo_foundations :: PhyloFoundations
Phylo { _phylo_duration :: !(Start, End)
, _phylo_foundations :: !PhyloFoundations
, _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: Map Date Double
, _phylo_docsByYears :: !(Map Date Double)
, _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
, _phylo_fis :: !(Map (Date,Date) [PhyloFis])
, _phylo_param :: PhyloParam
, _phylo_param :: !PhyloParam
}
deriving (Generic, Show, Eq)
-- | The foundations of a phylomemy created from a given TermList
data PhyloFoundations =
PhyloFoundations { _phylo_foundationsRoots :: Vector Ngrams
, _phylo_foundationsTermsList :: TermList
PhyloFoundations { _phylo_foundationsRoots :: !(Vector Ngrams)
, _phylo_foundationsTermsList :: !TermList
} deriving (Generic, Show, Eq)
......@@ -107,8 +107,8 @@ type End = Date
-- Period: tuple (start date, end date) of the step of the phylomemy
-- Levels: levels of granularity
data PhyloPeriod =
PhyloPeriod { _phylo_periodId :: PhyloPeriodId
, _phylo_periodLevels :: [PhyloLevel]
PhyloPeriod { _phylo_periodId :: !PhyloPeriodId
, _phylo_periodLevels :: ![PhyloLevel]
}
deriving (Generic, Show, Eq)
......@@ -125,8 +125,8 @@ data PhyloPeriod =
-- Level 1: First level of clustering
-- Level N: Nth level of clustering
data PhyloLevel =
PhyloLevel { _phylo_levelId :: PhyloLevelId
, _phylo_levelGroups :: [PhyloGroup]
PhyloLevel { _phylo_levelId :: !PhyloLevelId
, _phylo_levelGroups :: ![PhyloGroup]
}
deriving (Generic, Show, Eq)
......@@ -144,19 +144,19 @@ data PhyloLevel =
-- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
-- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
data PhyloGroup =
PhyloGroup { _phylo_groupId :: PhyloGroupId
, _phylo_groupLabel :: Text
, _phylo_groupNgrams :: [Int]
, _phylo_groupNgramsMeta :: Map Text [Double]
, _phylo_groupMeta :: Map Text Double
, _phylo_groupBranchId :: Maybe PhyloBranchId
PhyloGroup { _phylo_groupId :: !PhyloGroupId
, _phylo_groupLabel :: !Text
, _phylo_groupNgrams :: ![Int]
, _phylo_groupNgramsMeta :: !(Map Text [Double])
, _phylo_groupMeta :: !(Map Text Double)
, _phylo_groupBranchId :: !(Maybe PhyloBranchId)
, _phylo_groupCooc :: !(Map (Int,Int) Double)
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupPeriodParents :: ![Pointer]
, _phylo_groupPeriodChilds :: ![Pointer]
, _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupLevelParents :: ![Pointer]
, _phylo_groupLevelChilds :: ![Pointer]
}
deriving (Generic, NFData, Show, Eq, Ord)
......@@ -190,8 +190,8 @@ type Ngrams = Text
-- | Document : a piece of Text linked to a Date
data Document = Document
{ date :: Date
, text :: [Ngrams]
{ date :: !Date
, text :: ![Ngrams]
} deriving (Show,Generic,NFData)
-- | Clique : Set of ngrams cooccurring in the same Document
......@@ -200,9 +200,9 @@ type Clique = Set Ngrams
type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique
, _phyloFis_support :: Support
, _phyloFis_period :: (Date,Date)
{ _phyloFis_clique :: !Clique
, _phyloFis_support :: !Support
, _phyloFis_period :: !(Date,Date)
} deriving (Generic,NFData,Show,Eq)
-- | A list of clustered PhyloGroup
......@@ -240,18 +240,18 @@ data Cluster = Fis FisParams
-- | Parameters for Fis clustering
data FisParams = FisParams
{ _fis_keepMinorFis :: Bool
, _fis_minSupport :: Support
, _fis_minSize :: Int
{ _fis_keepMinorFis :: !Bool
, _fis_minSupport :: !Support
, _fis_minSize :: !Int
} deriving (Generic, Show, Eq, Read)
-- | Parameters for RelatedComponents clustering
data RCParams = RCParams
{ _rc_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
{ _rc_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
-- | Parameters for Louvain clustering
data LouvainParams = LouvainParams
{ _louvain_proximity :: Proximity } deriving (Generic, Show, Eq, Read)
{ _louvain_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
-------------------
......@@ -267,13 +267,13 @@ data Proximity = WeightedLogJaccard WLJParams
-- | Parameters for WeightedLogJaccard proximity
data WLJParams = WLJParams
{ _wlj_threshold :: Double
, _wlj_sensibility :: Double
{ _wlj_threshold :: !Double
, _wlj_sensibility :: !Double
} deriving (Generic, Show, Eq, Read)
-- | Parameters for Hamming proximity
data HammingParams = HammingParams
{ _hamming_threshold :: Double } deriving (Generic, Show, Eq, Read)
{ _hamming_threshold :: !Double } deriving (Generic, Show, Eq, Read)
----------------
......@@ -288,13 +288,13 @@ data Filter = LonelyBranch LBParams
-- | Parameters for LonelyBranch filter
data LBParams = LBParams
{ _lb_periodsInf :: Int
, _lb_periodsSup :: Int
, _lb_minNodes :: Int } deriving (Generic, Show, Eq)
{ _lb_periodsInf :: !Int
, _lb_periodsSup :: !Int
, _lb_minNodes :: !Int } deriving (Generic, Show, Eq)
-- | Parameters for SizeBranch filter
data SBParams = SBParams
{ _sb_minSize :: Int } deriving (Generic, Show, Eq)
{ _sb_minSize :: !Int } deriving (Generic, Show, Eq)
----------------
......@@ -333,30 +333,30 @@ data Order = Asc | Desc deriving (Generic, Show, Read)
-- | A Phyloquery describes a phylomemic reconstruction
data PhyloQueryBuild = PhyloQueryBuild
{ _q_phyloTitle :: Text
, _q_phyloDesc :: Text
{ _q_phyloTitle :: !Text
, _q_phyloDesc :: !Text
-- Grain and Steps for the PhyloPeriods
, _q_periodGrain :: Int
, _q_periodSteps :: Int
, _q_periodGrain :: !Int
, _q_periodSteps :: !Int
-- Clustering method for building the contextual unit of Phylo (ie: level 1)
, _q_contextualUnit :: Cluster
, _q_contextualUnitMetrics :: [Metric]
, _q_contextualUnitFilters :: [Filter]
, _q_contextualUnit :: !Cluster
, _q_contextualUnitMetrics :: ![Metric]
, _q_contextualUnitFilters :: ![Filter]
-- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: Proximity
, _q_interTemporalMatchingFrame :: Int
, _q_interTemporalMatchingFrameTh :: Double
, _q_interTemporalMatching :: !Proximity
, _q_interTemporalMatchingFrame :: !Int
, _q_interTemporalMatchingFrameTh :: !Double
, _q_reBranchThr :: Double
, _q_reBranchNth :: Int
, _q_reBranchThr :: !Double
, _q_reBranchNth :: !Int
-- Last level of reconstruction
, _q_nthLevel :: Level
, _q_nthLevel :: !Level
-- Clustering method used from level 1 to nthLevel
, _q_nthCluster :: Cluster
, _q_nthCluster :: !Cluster
} deriving (Generic, Show, Eq)
-- | To choose the Phylo edge you want to export : --> <-- <--> <=>
......@@ -370,42 +370,42 @@ data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
-- | A PhyloView is the output type of a Phylo
data PhyloView = PhyloView
{ _pv_param :: PhyloParam
, _pv_title :: Text
, _pv_description :: Text
, _pv_filiation :: Filiation
, _pv_level :: Level
, _pv_periods :: [PhyloPeriodId]
, _pv_metrics :: Map Text [Double]
, _pv_branches :: [PhyloBranch]
, _pv_nodes :: [PhyloNode]
, _pv_edges :: [PhyloEdge]
{ _pv_param :: !PhyloParam
, _pv_title :: !Text
, _pv_description :: !Text
, _pv_filiation :: !Filiation
, _pv_level :: !Level
, _pv_periods :: ![PhyloPeriodId]
, _pv_metrics :: !(Map Text [Double])
, _pv_branches :: ![PhyloBranch]
, _pv_nodes :: ![PhyloNode]
, _pv_edges :: ![PhyloEdge]
} deriving (Generic, Show)
-- | A phyloview is made of PhyloBranches, edges and nodes
data PhyloBranch = PhyloBranch
{ _pb_id :: PhyloBranchId
, _pb_peak :: Text
, _pb_metrics :: Map Text [Double]
{ _pb_id :: !PhyloBranchId
, _pb_peak :: !Text
, _pb_metrics :: !(Map Text [Double])
} deriving (Generic, Show)
data PhyloEdge = PhyloEdge
{ _pe_source :: PhyloGroupId
, _pe_target :: PhyloGroupId
, _pe_type :: EdgeType
, _pe_weight :: Weight
{ _pe_source :: !PhyloGroupId
, _pe_target :: !PhyloGroupId
, _pe_type :: !EdgeType
, _pe_weight :: !Weight
} deriving (Generic, Show)
data PhyloNode = PhyloNode
{ _pn_id :: PhyloGroupId
, _pn_bid :: Maybe PhyloBranchId
, _pn_label :: Text
, _pn_idx :: [Int]
, _pn_ngrams :: Maybe [Ngrams]
, _pn_metrics :: Map Text [Double]
, _pn_cooc :: Map (Int,Int) Double
, _pn_parents :: Maybe [PhyloGroupId]
, _pn_childs :: [PhyloNode]
{ _pn_id :: !PhyloGroupId
, _pn_bid :: !(Maybe PhyloBranchId)
, _pn_label :: !Text
, _pn_idx :: ![Int]
, _pn_ngrams :: !(Maybe [Ngrams])
, _pn_metrics :: !(Map Text [Double])
, _pn_cooc :: !(Map (Int,Int) Double)
, _pn_parents :: !(Maybe [PhyloGroupId])
, _pn_childs :: ![PhyloNode]
} deriving (Generic, Show)
------------------------
......@@ -420,28 +420,28 @@ data DisplayMode = Flat | Nested
-- | A PhyloQueryView describes a Phylo as an output view
data PhyloQueryView = PhyloQueryView
{ _qv_lvl :: Level
{ _qv_lvl :: !Level
-- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
, _qv_filiation :: Filiation
, _qv_filiation :: !Filiation
-- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
, _qv_levelChilds :: Bool
, _qv_levelChildsDepth :: Level
, _qv_levelChilds :: !Bool
, _qv_levelChildsDepth :: !Level
-- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
-- Firstly the metrics, then the filters and the taggers
, _qv_metrics :: [Metric]
, _qv_filters :: [Filter]
, _qv_taggers :: [Tagger]
, _qv_metrics :: ![Metric]
, _qv_filters :: ![Filter]
, _qv_taggers :: ![Tagger]
-- An asc or desc sort to apply to the PhyloGraph
, _qv_sort :: Maybe (Sort,Order)
, _qv_sort :: !(Maybe (Sort,Order))
-- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
, _qv_export :: ExportMode
, _qv_display :: DisplayMode
, _qv_verbose :: Bool
, _qv_export :: !ExportMode
, _qv_display :: !DisplayMode
, _qv_verbose :: !Bool
}
......
......@@ -17,6 +17,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.API
where
import Data.Maybe (fromMaybe)
import Control.Lens ((^.))
import Data.String.Conversions
--import Control.Monad.Reader (ask)
......@@ -98,13 +99,13 @@ getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo phId _lId l msb = do
phNode <- getNodeWith phId (Proxy :: Proxy HyperdataPhylo)
let
level = maybe 2 identity l
branc = maybe 2 identity msb
level = fromMaybe 2 l
branc = fromMaybe 2 msb
maybePhylo = phNode ^. (node_hyperdata . hp_data)
p <- liftBase $ viewPhylo2Svg
$ viewPhylo level branc
$ maybe phyloFromQuery identity maybePhylo
$ fromMaybe phyloFromQuery maybePhylo
pure (SVG p)
------------------------------------------------------------------------
type PostPhylo = QueryParam "listId" ListId
......@@ -112,16 +113,16 @@ type PostPhylo = QueryParam "listId" ListId
:> (Post '[JSON] NodeId)
postPhylo :: CorpusId -> UserId -> GargServer PostPhylo
postPhylo n userId _lId = do
postPhylo corpusId userId _lId = do
-- TODO get Reader settings
-- s <- ask
let
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
phy <- flowPhylo n
pId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just n) userId]
pure $ NodeId (fromIntegral pId)
phy <- flowPhylo corpusId -- params
phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
pure $ NodeId (fromIntegral phyloId)
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
......@@ -136,64 +137,25 @@ putPhylo = undefined
-- | Instances
instance Arbitrary PhyloView
where
arbitrary = elements [phyloView]
-- | TODO add phyloGroup ex
instance Arbitrary PhyloGroup
where
arbitrary = elements []
instance Arbitrary Phylo
where
arbitrary = elements [phylo]
instance ToSchema Order
instance ToParamSchema Order
instance FromHttpApiData Order
where
parseUrlPiece = readTextData
instance ToParamSchema Metric
instance FromHttpApiData [Metric]
where
parseUrlPiece = readTextData
instance FromHttpApiData Metric
where
parseUrlPiece = readTextData
instance Arbitrary Phylo where arbitrary = elements [phylo]
instance Arbitrary PhyloGroup where arbitrary = elements []
instance Arbitrary PhyloView where arbitrary = elements [phyloView]
instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData
instance FromHttpApiData ExportMode where parseUrlPiece = readTextData
instance FromHttpApiData Filiation where parseUrlPiece = readTextData
instance FromHttpApiData Metric where parseUrlPiece = readTextData
instance FromHttpApiData Order where parseUrlPiece = readTextData
instance FromHttpApiData Sort where parseUrlPiece = readTextData
instance FromHttpApiData Tagger where parseUrlPiece = readTextData
instance FromHttpApiData [Metric] where parseUrlPiece = readTextData
instance FromHttpApiData [Tagger] where parseUrlPiece = readTextData
instance ToParamSchema DisplayMode
instance FromHttpApiData DisplayMode
where
parseUrlPiece = readTextData
instance ToParamSchema ExportMode
instance FromHttpApiData ExportMode
where
parseUrlPiece = readTextData
instance FromHttpApiData Sort
where
parseUrlPiece = readTextData
instance ToParamSchema Sort
instance FromHttpApiData [Tagger]
where
parseUrlPiece = readTextData
instance FromHttpApiData Tagger
where
parseUrlPiece = readTextData
instance ToParamSchema Tagger
instance FromHttpApiData Filiation
where
parseUrlPiece = readTextData
instance ToParamSchema Filiation
instance ToParamSchema Tagger
instance ToParamSchema Metric
instance ToParamSchema Order
instance ToParamSchema Sort
instance ToSchema Order
......@@ -44,7 +44,6 @@ import Numeric.Statistics (percentile)
-- | PhyloLevelMaker | --
-------------------------
-- | A typeClass for polymorphic PhyloLevel functions
class PhyloLevelMaker aggregate
where
......@@ -105,11 +104,15 @@ instance PhyloLevelMaker Document
addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
addPhyloLevel' lvl m p = alterPhyloPeriods
(\period -> let pId = _phylo_periodId period
in over (phylo_periodLevels)
in over phylo_periodLevels
(\phyloLevels ->
let groups = toPhyloGroups lvl pId (m ! pId) m p
in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period) p
in trace (show (length groups)
<> " groups for "
<> show (pId) )
$ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period
) p
----------------------
......@@ -118,7 +121,14 @@ addPhyloLevel' lvl m p = alterPhyloPeriods
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
cliqueToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloFis
-> Map Date (Map (Int,Int) Double)
-> Vector Ngrams
-> PhyloGroup
cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
......@@ -142,10 +152,17 @@ cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
clusterToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloCluster
-> Map (Date,Date) [PhyloCluster]
-> Phylo
-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
empty
Nothing
......@@ -154,12 +171,14 @@ clusterToGroup prd lvl idx lbl groups _m p =
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
cooc = getMiniCooc (listToFullCombi ngrams)
(periodsToYears [prd] )
(getPhyloCooc p )
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
......@@ -195,9 +214,13 @@ toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase
$ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
--------------------------------------
phyloBase = tracePhyloBase
$ toPhyloBase q init c termList fis
where
init = initPhyloParam (Just defaultPhyloVersion)
(Just defaultSoftware )
(Just q )
---------------------------------------
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
......@@ -205,17 +228,16 @@ toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
toNthLevel lvlMax prox clus p
| lvl >= lvlMax = p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
-- \$ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
(clusters) p
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1) (clusters) p
where
--------------------------------------
clusters :: Map (Date,Date) [PhyloCluster]
......
......@@ -35,12 +35,14 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Viz.Phylo hiding (Svg, Dot)
import Gargantext.Core.Viz.Phylo.LevelMaker
import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
type MinSizeBranch = Int
flowPhylo :: FlowCmdM env err m
......@@ -48,14 +50,14 @@ flowPhylo :: FlowCmdM env err m
-> m Phylo
flowPhylo cId = do
list <- defaultList cId
list <- defaultList cId
termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms MapTerm
docs' <- catMaybes
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
let
patterns = buildPatterns termList
......@@ -65,10 +67,13 @@ flowPhylo cId = do
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = List.nub $ List.concat $ map (map Text.unwords) $ extractTermsWithList pats txt
termsInText pats txt = List.nub
$ List.concat
$ map (map Text.unwords)
$ extractTermsWithList pats txt
--------------------------------------
docs = map ( (\(y,t) -> Document y t) . filterTerms patterns) docs'
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
--liftBase $ flowPhylo' (List.sortOn date docs) termList l m fp
pure $ buildPhylo (List.sortOn date docs) termList
......@@ -76,9 +81,9 @@ flowPhylo cId = do
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
flowPhylo' corpus terms l m fp = do
let
phylo = buildPhylo corpus terms
......
......@@ -142,6 +142,7 @@ groupToDotNode fdt g bId =
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
, toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
])
......@@ -192,7 +193,7 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
{-
......@@ -201,7 +202,7 @@ exportToDot phylo export =
-- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do
graphAttrs [Rank SameRank]
-- graphAttrs [Rank SameRank]
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
......@@ -368,8 +369,8 @@ inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
ngramsMetrics :: PhyloExport -> PhyloExport
ngramsMetrics export =
ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
ngramsMetrics phylo export =
over ( export_groups
. traverse )
(\g -> g & phylo_groupMeta %~ insert "genericity"
......@@ -378,6 +379,8 @@ ngramsMetrics export =
(map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "inclusion"
(map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "frequence"
(map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
) export
......@@ -397,9 +400,9 @@ branchDating export =
& branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
processMetrics :: PhyloExport -> PhyloExport
processMetrics export = ngramsMetrics
$ branchDating export
processMetrics :: Phylo -> PhyloExport -> PhyloExport
processMetrics phylo export = ngramsMetrics phylo
$ branchDating export
-----------------
......@@ -598,8 +601,10 @@ toHorizon phylo =
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods level [prd] phylo
childs = getPreviousChildIds level frame prd periods phylo
heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
childs = getPreviousChildIds level frame prd periods phylo
-- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
......@@ -630,7 +635,7 @@ toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
$ processMetrics export
$ processMetrics phylo export
where
export :: PhyloExport
export = PhyloExport groups branches
......
......@@ -37,6 +37,17 @@ import qualified Data.Set as Set
-- | To Phylo | --
------------------
{-
-- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_phylo1 :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> Config -> Phylo
toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo
-}
toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
......@@ -48,9 +59,11 @@ toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFrom
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 docs phyloBase
-- > AD to db here
--------------------------------------
phyloBase :: Phylo
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
-- > AD to db here
--------------------------------------
......@@ -202,7 +215,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.01 cooc))
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.001 cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
......@@ -232,9 +245,9 @@ docsToTimeScaleCooc docs fdt =
-----------------------
-- | to Phylo Base | --
-----------------------
-- TODO anoe
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc =
groupDocsByPeriodRec f prds docs acc =
if ((null prds) || (null docs))
then acc
else
......@@ -245,7 +258,7 @@ groupDocsByPeriodRec f prds docs acc =
-- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = map (inPeriode f docs') pds
periods' = periods `using` parList rdeepseq
......@@ -262,7 +275,7 @@ groupDocsByPeriod' f pds docs =
-- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
......
......@@ -19,6 +19,8 @@ import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithK
import Data.String (String)
import Data.Text (Text)
import Prelude (floor)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Text.Printf
......@@ -56,6 +58,22 @@ printIOComment cmt =
-- | Misc | --
--------------
-- truncate' :: Double -> Int -> Double
-- truncate' x n = (fromIntegral (floor (x * t))) / t
-- where t = 10^n
truncate' :: Double -> Int -> Double
truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
where
--------------
t :: Double
t = 10 ^n
getInMap :: Int -> Map Int Double -> Double
getInMap k m =
if (member k m)
then m ! k
else 0
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment