Commit 4848a3bf authored by Alexandre Delanoë's avatar Alexandre Delanoë

[REFACT] SocialList (temp reorg)

parent 6eb6b6cd
...@@ -34,6 +34,7 @@ import Gargantext.Core.Text (size) ...@@ -34,6 +34,7 @@ import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Social (flowSocialList, flowSocialList', FlowSocialListPriority(..), invertForw) import Gargantext.Core.Text.List.Social (flowSocialList, flowSocialList', FlowSocialListPriority(..), invertForw)
import Gargantext.Core.Text.List.Social.Scores (FlowListScores) import Gargantext.Core.Text.List.Social.Scores (FlowListScores)
import Gargantext.Core.Text.List.Group import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal) import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId) import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
......
...@@ -30,6 +30,8 @@ import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId) ...@@ -30,6 +30,8 @@ import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..)) -- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue) import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude import Gargantext.Prelude
...@@ -38,148 +40,8 @@ import qualified Data.Map as Map ...@@ -38,148 +40,8 @@ import qualified Data.Map as Map
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Text as Text import qualified Data.Text as Text
{-
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !StopSize
}
| BuilderStep1 { withModel :: !Model }
| BuilderStepN { withModel :: !Model }
| Tficf { nlb_lang :: !Lang
, nlb_group1 :: !Int
, nlb_group2 :: !Int
, nlb_stopSize :: !StopSize
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
-}
data StopSize = StopSize {unStopSize :: !Int}
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_len :: !Int
, unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize
}
| GroupIdentity
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
data GroupedTextParams a b =
GroupedTextParams { _gt_fun_stem :: Text -> Text
, _gt_fun_score :: a -> b
, _gt_fun_texts :: a -> Set Text
, _gt_fun_nodeIds :: a -> Set NodeId
-- , _gt_fun_size :: a -> Int
}
makeLenses 'GroupedTextParams
groupedTextWithStem :: Ord b
=> GroupedTextParams a b
-> Map Text a
-> Map Stem (GroupedText b)
groupedTextWithStem gparams from =
Map.fromListWith (<>) $ map (group gparams) $ Map.toList from
where
group gparams' (t,d) = let t' = (gparams' ^. gt_fun_stem) t
in (t', GroupedText
Nothing
t
((gparams' ^. gt_fun_score) d)
((gparams' ^. gt_fun_texts) d)
(size t)
t'
((gparams' ^. gt_fun_nodeIds) d)
)
------------------------------------------------------------------------
------------------------------------------------------------------------
type Stem = Text
data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Text
, _gt_score :: !score
, _gt_children :: !(Set Text)
, _gt_size :: !Int
, _gt_stem :: !Stem -- needed ?
, _gt_nodes :: !(Set NodeId)
} {-deriving Show--}
--{-
instance Show score => Show (GroupedText score) where
show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
--}
instance (Eq a) => Eq (GroupedText a) where
(==) (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
instance (Eq a, Ord a) => Ord (GroupedText a) where
compare (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = compare score1 score2
-- | Lenses Instances
makeLenses 'GroupedText
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance WithParent (Text, Set NodeId) (Text, Set NodeId) where toGroupedText :: GroupedTextParams a b
selfParent (t, (_,n)) = (t, n)
instance HasNgrams (Text, Set NodeId) where
hasNgrams (t, _) = t
-- instance HasGroupWithScores (Text, Set NodeId) Int where
createGroupWithScores' fs (t, ns) = GroupedText (keyWithMaxValue $ fs ^. flc_lists)
label
(Set.size ns)
children
(size t)
t
ns
where
(label, children) = case keyWithMaxValue $ fs ^. flc_parents of
Nothing -> (t, Set.empty)
Just t' -> (t', Set.singleton t)
updateGroupWithScores' :: FlowListScores
-> (a, Set NodeId) -> GroupedText score -> GroupedText score
updateGroupWithScores' fs (t, ns) g =
set gt_listType (keyWithMaxValue $ fs ^. flc_lists)
$ set gt_nodes (Set.union ns $ g ^. gt_nodes) g
withParent' :: FlowListScores
-> Map Text (Set NodeId)
-> Text
-> (Text, Set NodeId)
-> (Text, Set NodeId)
withParent' fs m t a = undefined
------------------------------------------------------------------------
toGroupedText :: {-( FlowList c a b
Ord b
)
=> -} GroupedTextParams a b
-> Map Text FlowListScores -> Map Text FlowListScores
-> Map Text (Set NodeId) -> Map Text (Set NodeId)
-> Map Stem (GroupedText Int) -> Map Stem (GroupedText Int)
...@@ -187,97 +49,7 @@ toGroupedText groupParams scores = ...@@ -187,97 +49,7 @@ toGroupedText groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores) (groupWithStem groupParams) . (groupWithScores scores)
groupWithStem :: {- ( HasNgrams a
, HasGroupWithScores a b
, Semigroup a
, Ord b
)
=> -} GroupedTextParams a b
-> ([(Text, Set NodeId)], Map Text (GroupedText Int))
-> Map Stem (GroupedText Int)
groupWithStem _ = snd -- TODO (just for tests on Others Ngrams which do not need stem)
groupWithScores :: {- Ord b -- (FlowList c a b, Ord b)
=> -} Map Text FlowListScores
-> Map Text (Set NodeId)
-> ([(Text, Set NodeId)], Map Text (GroupedText Int))
groupWithScores scores ms' = foldl' fun_group start ms
where
start = ([], Map.empty)
ms = map identity (Map.toList ms')
fun_group :: ([(Text, Set NodeId)], Map Text (GroupedText Int)) -> (Text, Set NodeId)
-> ([(Text, Set NodeId)], Map Text (GroupedText Int))
fun_group (left, grouped) current =
case Map.lookup (fst current) scores of
Just scores' ->
case keyWithMaxValue $ scores' ^. flc_parents of
Nothing -> (left, Map.alter (updateWith scores' current) (fst current) grouped)
Just parent -> fun_group (left, grouped) (withParent' scores' ms' parent current)
Nothing -> (current : left, grouped)
updateWith scores current Nothing = Just $ createGroupWithScores' scores current
updateWith scores current (Just x) = Just $ updateGroupWithScores' scores current x
-------
groupWithScores' :: Map Text GroupedWithListScores
-> Map Text (Set NodeId)
-> Map Text (GroupedText Int)
groupWithScores' scores ms = foldl' (fun_group scores) start (Map.toList ms)
where
start = ([], Map.empty)
fun_group :: Map Text FlowListScores
-> ([(Text, GroupedText Int)], Map Text (GroupedText Int))
-> (Text, GroupedText Int)
-> ([(Text, GroupedText Int)], Map Text (GroupedText Int))
fun_group = undefined
------------------------------------------------------------------------ ------------------------------------------------------------------------
type FlowList c a b = (HasNgrams a, HasGroupWithScores a b, WithParent c a, Semigroup a)
class HasNgrams a where
hasNgrams :: a -> Text
class HasGroup a b | a -> b where
hasGroup :: a -> GroupedText b
class HasGroupWithStem a b where
hasGroupWithStem :: GroupedTextParams a b -> Map Text a -> Map Stem (GroupedText b)
class HasGroupWithScores a b | a -> b where
createGroupWithScores :: FlowListScores -> a -> GroupedText b
updateGroupWithScores :: FlowListScores -> a -> GroupedText b -> GroupedText b
class WithParent c a | c -> a where
selfParent :: (Text, c) -> a
withParent :: Map Text c -> Text -> a -> a
------------------------------------------------------------------------
instance Ord a => Semigroup (GroupedText a) where
(<>) (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
------------------------------------------------------------------------
-- to remove
-- | These instances seeems useless, just for debug purpose
instance HasNgrams (Set Text, Set NodeId) where
hasNgrams = fromMaybe "Nothing" . head . Set.elems . fst
instance HasGroupWithScores (Set Text, Set NodeId) Int where
createGroupWithScores = undefined
updateGroupWithScores = undefined
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | To be removed -- | To be removed
addListType :: Map Text ListType -> GroupedText a -> GroupedText a addListType :: Map Text ListType -> GroupedText a -> GroupedText a
......
{-|
Module : Gargantext.Core.Text.List.WithScores
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.List.Group.WithScores
where
import Data.Maybe (fromMaybe)
import Control.Lens (makeLenses, set, (^.), (%~))
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Data.Semigroup (Semigroup, (<>))
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social.Scores
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
data GroupedWithListScores =
GroupedWithListScores { _gwls_children :: !(Set Text)
, _gwls_listType :: !(Maybe ListType)
}
makeLenses ''GroupedWithListScores
toGroupedWithListScores :: Map Text FlowListScores -> Map Text GroupedWithListScores
toGroupedWithListScores ms = foldl' (toGroup ms) Map.empty (Map.toList ms)
where
toGroup :: Map Text FlowListScores
-> Map Text GroupedWithListScores
-> (Text, FlowListScores)
-> Map Text GroupedWithListScores
toGroup ms' result (t,fs) = case (keyWithMaxValue $ fs ^. flc_parents) of
Nothing -> Map.alter (addGroupedParent (t,fs)) t result
Just parent -> Map.alter (addGroupedChild (t,fs)) parent result
addGroupedParent :: (Text, FlowListScores) -> Maybe GroupedWithListScores -> Maybe GroupedWithListScores
addGroupedParent (_,fs) Nothing = Just $ GroupedWithListScores Set.empty list
where
list = keyWithMaxValue $ fs ^. flc_lists
addGroupedParent (t,fs) (Just g) = Just $ set gwls_listType list
$ (%~) gwls_children (Set.insert t) g
where
list = keyWithMaxValue $ fs ^. flc_lists
addGroupedChild :: (Text, FlowListScores) -> Maybe GroupedWithListScores -> Maybe GroupedWithListScores
addGroupedChild (t,fs) Nothing = Just $ GroupedWithListScores (Set.singleton t) list
where
list = keyWithMaxValue $ fs ^. flc_lists
addGroupedChild (t,fs) (Just g) = Just $ (%~) gwls_listType (<> list)
$ (%~) gwls_children (Set.insert t) g
where
list = keyWithMaxValue $ fs ^. flc_lists
data GroupedTextScores score =
GroupedTextScores { _gts_listType :: !(Maybe ListType)
, _gts_score :: score
, _gts_children :: !(Set Text)
}
makeLenses 'GroupedTextScores
groupWithScores :: Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Text (GroupedTextScores (Set NodeId))
groupWithScores scores =
Map.mapWithKey (\k a -> scoresToGroupedTextScores
(Map.lookup k $ toGroupedWithListScores scores)
k a
)
scoresToGroupedTextScores :: Maybe GroupedWithListScores
-> Text -> Set NodeId
-> GroupedTextScores (Set NodeId)
scoresToGroupedTextScores Nothing t ns = undefined
scoresToGroupedTextScores (Just g) t ns = undefined
{-|
Module : Gargantext.Core.Text.List.Group.WithStem
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.List.Group.WithStem
where
import Data.Maybe (fromMaybe)
import Control.Lens (makeLenses, set, (^.))
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Data.Semigroup (Semigroup, (<>))
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
{-
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !StopSize
}
| BuilderStep1 { withModel :: !Model }
| BuilderStepN { withModel :: !Model }
| Tficf { nlb_lang :: !Lang
, nlb_group1 :: !Int
, nlb_group2 :: !Int
, nlb_stopSize :: !StopSize
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
-}
data StopSize = StopSize {unStopSize :: !Int}
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_len :: !Int
, unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize
}
| GroupIdentity
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
data GroupedTextParams a b =
GroupedTextParams { _gt_fun_stem :: Text -> Text
, _gt_fun_score :: a -> b
, _gt_fun_texts :: a -> Set Text
, _gt_fun_nodeIds :: a -> Set NodeId
-- , _gt_fun_size :: a -> Int
}
makeLenses 'GroupedTextParams
groupedTextWithStem :: Ord b
=> GroupedTextParams a b
-> Map Text a
-> Map Stem (GroupedText b)
groupedTextWithStem gparams from =
Map.fromListWith (<>) $ map (group gparams) $ Map.toList from
where
group gparams' (t,d) = let t' = (gparams' ^. gt_fun_stem) t
in (t', GroupedText
Nothing
t
((gparams' ^. gt_fun_score) d)
((gparams' ^. gt_fun_texts) d)
(size t)
t'
((gparams' ^. gt_fun_nodeIds) d)
)
------------------------------------------------------------------------
type Stem = Text
data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Text
, _gt_score :: !score
, _gt_children :: !(Set Text)
, _gt_size :: !Int
, _gt_stem :: !Stem -- needed ?
, _gt_nodes :: !(Set NodeId)
} {-deriving Show--}
--{-
instance Show score => Show (GroupedText score) where
show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
--}
instance (Eq a) => Eq (GroupedText a) where
(==) (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
instance (Eq a, Ord a) => Ord (GroupedText a) where
compare (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = compare score1 score2
instance Ord a => Semigroup (GroupedText a) where
(<>) (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
-- | Lenses Instances
makeLenses 'GroupedText
------------------------------------------------------------------------
groupWithStem :: {- ( HasNgrams a
, HasGroupWithScores a b
, Semigroup a
, Ord b
)
=> -} GroupedTextParams a b
-> Map Text (GroupedTextScores (Set NodeId))
-> Map Stem (GroupedText Int)
groupWithStem _ = undefined -- TODO (just for tests on Others Ngrams which do not need stem)
------------------------------------------------------------------------
...@@ -82,45 +82,6 @@ instance Semigroup FlowListScores where ...@@ -82,45 +82,6 @@ instance Semigroup FlowListScores where
------------------------------------------------------------------------ ------------------------------------------------------------------------
data GroupedWithListScores =
GroupedWithListScores { _gwls_children :: !(Set Text)
, _gwls_listType :: !(Maybe ListType)
}
makeLenses ''GroupedWithListScores
toGroupedWithListScores :: Map Text FlowListScores -> Map Text GroupedWithListScores
toGroupedWithListScores ms = foldl' (toGroup ms) Map.empty (Map.toList ms)
where
toGroup :: Map Text FlowListScores
-> Map Text GroupedWithListScores
-> (Text, FlowListScores)
-> Map Text GroupedWithListScores
toGroup ms' result (t,fs) = case (keyWithMaxValue $ fs ^. flc_parents) of
Nothing -> Map.alter (addGroupedParent (t,fs)) t result
Just parent -> Map.alter (addGroupedChild (t,fs)) parent result
addGroupedParent :: (Text, FlowListScores) -> Maybe GroupedWithListScores -> Maybe GroupedWithListScores
addGroupedParent (_,fs) Nothing = Just $ GroupedWithListScores Set.empty list
where
list = keyWithMaxValue $ fs ^. flc_lists
addGroupedParent (t,fs) (Just g) = Just $ set gwls_listType list
$ (%~) gwls_children (Set.insert t) g
where
list = keyWithMaxValue $ fs ^. flc_lists
addGroupedChild :: (Text, FlowListScores) -> Maybe GroupedWithListScores -> Maybe GroupedWithListScores
addGroupedChild (t,fs) Nothing = Just $ GroupedWithListScores (Set.singleton t) list
where
list = keyWithMaxValue $ fs ^. flc_lists
addGroupedChild (t,fs) (Just g) = Just $ (%~) gwls_listType (<> list)
$ (%~) gwls_children (Set.insert t) g
where
list = keyWithMaxValue $ fs ^. flc_lists
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | toFlowListScores which generate Score from list of Map Text -- | toFlowListScores which generate Score from list of Map Text
-- NgramsRepoElement -- NgramsRepoElement
......
...@@ -64,9 +64,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName) ...@@ -64,9 +64,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile) import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Core.Flow.Types import Gargantext.Core.Flow.Types
import Gargantext.Core.Text import Gargantext.Core.Text
import Gargantext.Core.Text.List.Group.WithStem (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat) import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..)) import Gargantext.Core.Types (Terms(..))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment