Commit bee4a824 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Social lists, connection (WIP)

parent df4f16a2
Pipeline #1209 failed with stage
...@@ -89,6 +89,10 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do ...@@ -89,6 +89,10 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
<- flowSocialList' MySelfFirst user nt (Set.fromList $ Map.keys ngs') <- flowSocialList' MySelfFirst user nt (Set.fromList $ Map.keys ngs')
-- PrivateFirst for first developments since Public NodeMode is not implemented yet -- PrivateFirst for first developments since Public NodeMode is not implemented yet
let
groupParams = GroupedTextParams groupIt (Set.size . snd) fst snd {-(size . fst)-}
grouped' = toGroupedText groupParams socialLists' ngs'
-- 8< 8< 8< 8< 8< 8< 8< -- 8< 8< 8< 8< 8< 8< 8<
let let
ngs :: Map Text (Set Text, Set NodeId) = groupNodesByNgramsWith groupIt ngs' ngs :: Map Text (Set Text, Set NodeId) = groupNodesByNgramsWith groupIt ngs'
...@@ -96,8 +100,7 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do ...@@ -96,8 +100,7 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
-- >8 >8 >8 >8 >8 >8 >8 -- >8 >8 >8 >8 >8 >8 >8
let let
grouped = toGroupedText (GroupedTextParams groupIt (Set.size . snd) fst snd {-(size . fst)-} ) -- socialLists' grouped = groupedTextWithStem (GroupedTextParams groupIt (Set.size . snd) fst snd {-(size . fst)-} ) -- socialLists'
$ Map.toList
$ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b)) $ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b))
$ ngs $ ngs
...@@ -136,27 +139,17 @@ buildNgramsTermsList :: ( HasNodeError err ...@@ -136,27 +139,17 @@ buildNgramsTermsList :: ( HasNodeError err
buildNgramsTermsList user uCid mCid groupParams = do buildNgramsTermsList user uCid mCid groupParams = do
-- Computing global speGen score -- Computing global speGen score
allTerms :: [(Text, Double)] <- Map.toList <$> getTficf uCid mCid NgramsTerms allTerms :: Map Text Double <- getTficf uCid mCid NgramsTerms
-- printDebug "head candidates" (List.take 10 $ allTerms) -- printDebug "head candidates" (List.take 10 $ allTerms)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms) -- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms)
-- First remove stops terms -- First remove stops terms
socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst allTerms) socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst $ Map.toList allTerms)
-- printDebug "\n * socialLists * \n" socialLists -- printDebug "\n * socialLists * \n" socialLists
let
_socialStop = fromMaybe Set.empty $ Map.lookup StopTerm socialLists
_socialMap = fromMaybe Set.empty $ Map.lookup MapTerm socialLists
_socialCand = fromMaybe Set.empty $ Map.lookup CandidateTerm socialLists
-- stopTerms ignored for now (need to be tagged already)
-- (stopTerms, candidateTerms) = List.partition ((\t -> Set.member t socialStop) . fst) allTerms
-- (mapTerms, candidateTerms) = List.partition ((\t -> Set.member t socialMap ) . fst) allTerms
-- printDebug "stopTerms" stopTerms
-- Grouping the ngrams and keeping the maximum score for label -- Grouping the ngrams and keeping the maximum score for label
let grouped = toGroupedText ( GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms let grouped = groupedTextWithStem ( GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped groupedWithList = map (addListType (invertForw socialLists)) grouped
......
...@@ -18,6 +18,7 @@ Portability : POSIX ...@@ -18,6 +18,7 @@ Portability : POSIX
module Gargantext.Core.Text.List.Group module Gargantext.Core.Text.List.Group
where where
import Data.Maybe (fromMaybe)
import Control.Lens (makeLenses, set, (^.)) import Control.Lens (makeLenses, set, (^.))
import Data.Set (Set) import Data.Set (Set)
import Data.Map (Map) import Data.Map (Map)
...@@ -30,6 +31,7 @@ import Gargantext.Database.Admin.Types.Node (NodeId) ...@@ -30,6 +31,7 @@ import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..)) -- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue) import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -89,35 +91,33 @@ data GroupedTextParams a b = ...@@ -89,35 +91,33 @@ data GroupedTextParams a b =
makeLenses 'GroupedTextParams makeLenses 'GroupedTextParams
toGroupedText :: Ord b groupedTextWithStem :: Ord b
=> GroupedTextParams a b => GroupedTextParams a b
-> [(Text,a)] -> Map Text a
-> Map Stem (GroupedText b) -> Map Stem (GroupedText b)
toGroupedText gparams from = groupedTextWithStem gparams from =
Map.fromListWith union $ map group from Map.fromListWith union $ map (group gparams) $ Map.toList from
where where
group (t,d) = let t' = (gparams ^. gt_fun_stem) t group gparams' (t,d) = let t' = (gparams' ^. gt_fun_stem) t
in (t', GroupedText in (t', GroupedText
Nothing Nothing
t t
((gparams ^. gt_fun_score) d) ((gparams' ^. gt_fun_score) d)
((gparams ^. gt_fun_texts) d) ((gparams' ^. gt_fun_texts) d)
(size t) (size t)
t' t'
((gparams ^. gt_fun_nodeIds) d) ((gparams' ^. gt_fun_nodeIds) d)
) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ toGroupedText :: ( FlowList a b
, Ord b
toGroupedText' :: ( FlowList a b )
, Ord b => GroupedTextParams a b
) -> Map Text FlowListScores
=> GroupedTextParams a b -> Map Text c
-> Map Text FlowListScores -> Map Stem (GroupedText b)
-> Map Text a toGroupedText groupParams scores =
-> Map Stem (GroupedText b)
toGroupedText' groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores) (groupWithStem groupParams) . (groupWithScores scores)
...@@ -127,7 +127,7 @@ groupWithStem :: ( FlowList a b ...@@ -127,7 +127,7 @@ groupWithStem :: ( FlowList a b
=> GroupedTextParams a b => GroupedTextParams a b
-> ([a], Map Text (GroupedText b)) -> ([a], Map Text (GroupedText b))
-> Map Stem (GroupedText b) -> Map Stem (GroupedText b)
groupWithStem _ = snd -- TODO groupWithStem _ = snd -- TODO (just for tests on Others Ngrams which do not need stem)
groupWithScores :: (FlowList a b, Ord b) groupWithScores :: (FlowList a b, Ord b)
...@@ -137,7 +137,7 @@ groupWithScores :: (FlowList a b, Ord b) ...@@ -137,7 +137,7 @@ groupWithScores :: (FlowList a b, Ord b)
groupWithScores scores ms' = foldl' fun_group start ms groupWithScores scores ms' = foldl' fun_group start ms
where where
start = ([], Map.empty) start = ([], Map.empty)
ms = map selfParent (Map.toList ms') ms = map selfParent $ Map.toList ms'
fun_group (left, grouped) current = fun_group (left, grouped) current =
case Map.lookup (hasNgrams current) scores of case Map.lookup (hasNgrams current) scores of
...@@ -147,23 +147,29 @@ groupWithScores scores ms' = foldl' fun_group start ms ...@@ -147,23 +147,29 @@ groupWithScores scores ms' = foldl' fun_group start ms
Just parent -> fun_group (left, grouped) (withParent ms' parent current) Just parent -> fun_group (left, grouped) (withParent ms' parent current)
Nothing -> (current : left, grouped) Nothing -> (current : left, grouped)
updateWith scores current Nothing = Just $ createGroupWith scores current updateWith scores current Nothing = Just $ createGroupWithScores scores current
updateWith scores current (Just x) = Just $ updateGroupWith scores current x updateWith scores current (Just x) = Just $ updateGroupWithScores scores current x
------------------------------------------------------------------------ ------------------------------------------------------------------------
type FlowList a b = (HasNgrams a, HasGroup a b, WithParent a) type FlowList a b = (HasNgrams a, HasGroupWithScores a b, WithParent a)
class HasNgrams a where class HasNgrams a where
hasNgrams :: a -> Text hasNgrams :: a -> Text
class HasGroup a b | a -> b where class HasGroup a b | a -> b where
createGroupWith :: FlowListScores -> a -> GroupedText b hasGroup :: a -> GroupedText b
updateGroupWith :: FlowListScores -> a -> GroupedText b -> GroupedText b
class HasGroupWithStem a b where
hasGroupWithStem :: GroupedTextParams a b -> Map Text a -> Map Stem (GroupedText b)
class HasGroupWithScores a b | a -> b where
createGroupWithScores :: FlowListScores -> a -> GroupedText b
updateGroupWithScores :: FlowListScores -> a -> GroupedText b -> GroupedText b
class WithParent a where class WithParent a where
selfParent :: (Text, c) -> a selfParent :: (Text, c) -> a
withParent :: Map Text c -> Text -> a -> a withParent :: Map Text c -> Text -> a -> a
union :: a -> a -> a union :: a -> a -> a
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance Ord a => WithParent (GroupedText a) where instance Ord a => WithParent (GroupedText a) where
...@@ -176,26 +182,27 @@ instance Ord a => WithParent (GroupedText a) where ...@@ -176,26 +182,27 @@ instance Ord a => WithParent (GroupedText a) where
gr = Set.union group1 group2 gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2 nodes = Set.union nodes1 nodes2
{-
selfParent (t,d) = let t' = (gparams ^. gt_fun_stem) t
in (t', GroupedText
Nothing
t
((gparams ^. gt_fun_score) d)
((gparams ^. gt_fun_texts) d)
(size t)
t'
((gparams ^. gt_fun_nodeIds) d)
)
-}
------------------------------------------------------------------------
data GroupedTextOrigin a =
GroupedTextOrigin { _gto_lable :: !Text
, _gto_ngramsType :: !NgramsType
, _gto_score :: !a
, _gto_listType :: !(Maybe ListType)
, _gto_children :: !(Set Text)
, _gto_nodes :: !(Set NodeId)
}
data GroupedTextStem a =
GroupedTextStem { _gts_origin :: !(GroupedTextOrigin a)
, _gts_stem :: !Stem
}
------------------------------------------------------------------------ ------------------------------------------------------------------------
type Stem = Text type Stem = Text
type Label = Text
data GroupedText score = data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType) GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Label , _gt_label :: !Text
, _gt_score :: !score , _gt_score :: !score
, _gt_children :: !(Set Text) , _gt_children :: !(Set Text)
, _gt_size :: !Int , _gt_size :: !Int
...@@ -218,12 +225,25 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where ...@@ -218,12 +225,25 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where
-- | Lenses Instances -- | Lenses Instances
makeLenses 'GroupedText makeLenses 'GroupedText
------------------------------------------------------------------------
-- to remove
-- | These instances seeems useless, just for debug purpose
instance HasNgrams (Set Text, Set NodeId) where
hasNgrams = fromMaybe "Nothing" . head . Set.elems . fst
instance HasGroupWithScores (Set Text, Set NodeId) Int where
createGroupWithScores = undefined
updateGroupWithScores = undefined
instance WithParent (Set Text, Set NodeId) where
union = undefined
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance HasNgrams (Text, Set NodeId) where instance HasNgrams (Text, Set NodeId) where
hasNgrams (t, _) = t hasNgrams (t, _) = t
instance HasGroup (Text, Set NodeId) Int where instance HasGroupWithScores (Text, Set NodeId) Int where
createGroupWith fs (t, ns) = GroupedText (keyWithMaxValue $ fs ^. flc_lists) createGroupWithScores fs (t, ns) = GroupedText (keyWithMaxValue $ fs ^. flc_lists)
label label
(Set.size ns) (Set.size ns)
children children
...@@ -235,7 +255,7 @@ instance HasGroup (Text, Set NodeId) Int where ...@@ -235,7 +255,7 @@ instance HasGroup (Text, Set NodeId) Int where
Nothing -> (t, Set.empty) Nothing -> (t, Set.empty)
Just t' -> (t', Set.singleton t) Just t' -> (t', Set.singleton t)
updateGroupWith fs (t, ns) g = set gt_listType (keyWithMaxValue $ fs ^. flc_lists) updateGroupWithScores fs (t, ns) g = set gt_listType (keyWithMaxValue $ fs ^. flc_lists)
$ set gt_nodes (Set.union ns $ g ^. gt_nodes) g $ set gt_nodes (Set.union ns $ g ^. gt_nodes) g
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment