Commit df4f16a2 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] SocialLists clean before connection

parent 9bb32e37
...@@ -30,10 +30,11 @@ import qualified Data.Text as Text ...@@ -30,10 +30,11 @@ import qualified Data.Text as Text
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..)) -- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList) import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.API.Ngrams.Types (RepoCmdM) import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Social (flowSocialList, flowSocialList', FlowSocialListPriority(..), invertForw) import Gargantext.Core.Text.List.Social (flowSocialList, flowSocialList', FlowSocialListPriority(..), invertForw)
import Gargantext.Core.Text.List.Social.Group (FlowListScores) import Gargantext.Core.Text.List.Social.Scores (FlowListScores)
import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal) import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Core.Text.Group
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId) import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
...@@ -95,7 +96,7 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do ...@@ -95,7 +96,7 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
-- >8 >8 >8 >8 >8 >8 >8 -- >8 >8 >8 >8 >8 >8 >8
let let
grouped = toGroupedText (GroupedTextParams groupIt (Set.size . snd) fst snd) grouped = toGroupedText (GroupedTextParams groupIt (Set.size . snd) fst snd {-(size . fst)-} ) -- socialLists'
$ Map.toList $ Map.toList
$ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b)) $ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b))
$ ngs $ ngs
...@@ -144,8 +145,6 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -144,8 +145,6 @@ buildNgramsTermsList user uCid mCid groupParams = do
socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst allTerms) socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst allTerms)
-- printDebug "\n * socialLists * \n" socialLists -- printDebug "\n * socialLists * \n" socialLists
printDebug "\n * socialLists * \n" socialLists
let let
_socialStop = fromMaybe Set.empty $ Map.lookup StopTerm socialLists _socialStop = fromMaybe Set.empty $ Map.lookup StopTerm socialLists
_socialMap = fromMaybe Set.empty $ Map.lookup MapTerm socialLists _socialMap = fromMaybe Set.empty $ Map.lookup MapTerm socialLists
...@@ -157,7 +156,7 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -157,7 +156,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "stopTerms" stopTerms -- printDebug "stopTerms" stopTerms
-- Grouping the ngrams and keeping the maximum score for label -- Grouping the ngrams and keeping the maximum score for label
let grouped = toGroupedText (GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty)) allTerms let grouped = toGroupedText ( GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped groupedWithList = map (addListType (invertForw socialLists)) grouped
...@@ -167,7 +166,7 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -167,7 +166,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "\n * stopTerms * \n" stopTerms -- printDebug "\n * stopTerms * \n" stopTerms
-- splitting monterms and multiterms to take proportional candidates -- splitting monterms and multiterms to take proportional candidates
let let
listSizeGlobal = 2000 :: Double -- use % of list if to big, or Int if to small listSizeGlobal = 2000 :: Double -- use % of list if to big, or Int if too small
monoSize = 0.4 :: Double monoSize = 0.4 :: Double
multSize = 1 - monoSize multSize = 1 - monoSize
...@@ -202,8 +201,9 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -202,8 +201,9 @@ buildNgramsTermsList user uCid mCid groupParams = do
$ groupedMonoHead <> groupedMultHead $ groupedMonoHead <> groupedMultHead
-- grouping with Set NodeId -- grouping with Set NodeId
contextsAdded = foldl' (\mapGroups' k -> let k' = ngramsGroup groupParams k contextsAdded = foldl' (\mapGroups' k ->
in case Map.lookup k' mapGroups' of let k' = ngramsGroup groupParams k in
case Map.lookup k' mapGroups' of
Nothing -> mapGroups' Nothing -> mapGroups'
Just g -> case Map.lookup k mapTextDocIds of Just g -> case Map.lookup k mapTextDocIds of
Nothing -> mapGroups' Nothing -> mapGroups'
...@@ -217,6 +217,7 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -217,6 +217,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
$ Map.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2) $ Map.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
| (t1, s1) <- mapStemNodeIds | (t1, s1) <- mapStemNodeIds
, (t2, s2) <- mapStemNodeIds , (t2, s2) <- mapStemNodeIds
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
] ]
where where
mapStemNodeIds = Map.toList $ Map.map (_gt_nodes) contextsAdded mapStemNodeIds = Map.toList $ Map.map (_gt_nodes) contextsAdded
......
{-| {-|
Module : Gargantext.Core.Text.Group Module : Gargantext.Core.Text.List.Group
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -15,19 +15,20 @@ Portability : POSIX ...@@ -15,19 +15,20 @@ Portability : POSIX
{-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.Group module Gargantext.Core.Text.List.Group
where where
import Control.Lens (makeLenses, set, (^.)) import Control.Lens (makeLenses, set, (^.))
import Data.Set (Set) import Data.Set (Set)
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Data.Semigroup (Semigroup, (<>))
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size) import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId) import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..)) -- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social.Group (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue) import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -77,20 +78,13 @@ ngramsGroup (GroupParams l _m _n _) = ...@@ -77,20 +78,13 @@ ngramsGroup (GroupParams l _m _n _) =
. Text.splitOn " " . Text.splitOn " "
. Text.replace "-" " " . Text.replace "-" " "
------------------------------------------------------------------------
{-
mergeMapParent :: Map Text (GroupedText b)
-> Map Text (Map Text Int)
-> Map Text (GroupedText b)
mergeMapParent = undefined
-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
data GroupedTextParams a b = data GroupedTextParams a b =
GroupedTextParams { _gt_fun_stem :: Text -> Text GroupedTextParams { _gt_fun_stem :: Text -> Text
, _gt_fun_score :: a -> b , _gt_fun_score :: a -> b
, _gt_fun_texts :: a -> Set Text , _gt_fun_texts :: a -> Set Text
, _gt_fun_nodeIds :: a -> Set NodeId , _gt_fun_nodeIds :: a -> Set NodeId
-- , _gt_fun_size :: a -> Int
} }
makeLenses 'GroupedTextParams makeLenses 'GroupedTextParams
...@@ -100,7 +94,7 @@ toGroupedText :: Ord b ...@@ -100,7 +94,7 @@ toGroupedText :: Ord b
-> [(Text,a)] -> [(Text,a)]
-> Map Stem (GroupedText b) -> Map Stem (GroupedText b)
toGroupedText gparams from = toGroupedText gparams from =
Map.fromListWith grouping $ map group from Map.fromListWith union $ map group from
where where
group (t,d) = let t' = (gparams ^. gt_fun_stem) t group (t,d) = let t' = (gparams ^. gt_fun_stem) t
in (t', GroupedText in (t', GroupedText
...@@ -113,35 +107,34 @@ toGroupedText gparams from = ...@@ -113,35 +107,34 @@ toGroupedText gparams from =
((gparams ^. gt_fun_nodeIds) d) ((gparams ^. gt_fun_nodeIds) d)
) )
grouping :: Ord a
=> GroupedText a
-> GroupedText a
-> GroupedText a
grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
------------------------------------------------------------------------ ------------------------------------------------------------------------
toGroupedText_FlowListScores :: ( FlowList a b ------------------------------------------------------------------------
, Ord a
toGroupedText' :: ( FlowList a b
, Ord b
) )
=> [a] => GroupedTextParams a b
-> Map Text FlowListScores -> Map Text FlowListScores
-> Map Text (GroupedText b) -> Map Text a
toGroupedText_FlowListScores = undefined -> Map Stem (GroupedText b)
toGroupedText' groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores)
toGroupedText_FlowListScores' :: ( FlowList a b, Ord b)
=> Map Text c groupWithStem :: ( FlowList a b
-> Map Text FlowListScores , Ord b
-> ( [a]
, Map Text (GroupedText b)
) )
toGroupedText_FlowListScores' ms' scores = foldl' fun_group start ms => GroupedTextParams a b
-> ([a], Map Text (GroupedText b))
-> Map Stem (GroupedText b)
groupWithStem _ = snd -- TODO
groupWithScores :: (FlowList a b, Ord b)
=> Map Text FlowListScores
-> Map Text c
-> ([a], Map Text (GroupedText b))
groupWithScores scores ms' = foldl' fun_group start ms
where where
start = ([], Map.empty) start = ([], Map.empty)
ms = map selfParent (Map.toList ms') ms = map selfParent (Map.toList ms')
...@@ -170,6 +163,32 @@ class HasGroup a b | a -> b where ...@@ -170,6 +163,32 @@ class HasGroup a b | a -> b where
class WithParent a where class WithParent a where
selfParent :: (Text, c) -> a selfParent :: (Text, c) -> a
withParent :: Map Text c -> Text -> a -> a withParent :: Map Text c -> Text -> a -> a
union :: a -> a -> a
------------------------------------------------------------------------
instance Ord a => WithParent (GroupedText a) where
union (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
{-
selfParent (t,d) = let t' = (gparams ^. gt_fun_stem) t
in (t', GroupedText
Nothing
t
((gparams ^. gt_fun_score) d)
((gparams ^. gt_fun_texts) d)
(size t)
t'
((gparams ^. gt_fun_nodeIds) d)
)
-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
type Stem = Text type Stem = Text
...@@ -196,7 +215,7 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where ...@@ -196,7 +215,7 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where
compare (GroupedText _ _ score1 _ _ _ _) compare (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = compare score1 score2 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
-- Lenses Instances -- | Lenses Instances
makeLenses 'GroupedText makeLenses 'GroupedText
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -20,7 +20,7 @@ import Gargantext.API.Ngrams.Tools -- (getListNgrams) ...@@ -20,7 +20,7 @@ import Gargantext.API.Ngrams.Tools -- (getListNgrams)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.List.Social.Find import Gargantext.Core.Text.List.Social.Find
import Gargantext.Core.Text.List.Social.ListType import Gargantext.Core.Text.List.Social.ListType
import Gargantext.Core.Text.List.Social.Group import Gargantext.Core.Text.List.Social.Scores
import Gargantext.Core.Types.Individu import Gargantext.Core.Types.Individu
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
......
{-| {-|
Module : Gargantext.Core.Text.List.Social.Group Module : Gargantext.Core.Text.List.Social.Scores
Description : Description :
Copyright : (c) CNRS, 2018-Present Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -15,7 +15,7 @@ Portability : POSIX ...@@ -15,7 +15,7 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.List.Social.Group module Gargantext.Core.Text.List.Social.Scores
where where
import Control.Lens import Control.Lens
......
...@@ -66,7 +66,7 @@ import Gargantext.Core.Flow.Types ...@@ -66,7 +66,7 @@ import Gargantext.Core.Flow.Types
import Gargantext.Core.Text import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat) import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Group (StopSize(..), GroupParams(..)) import Gargantext.Core.Text.List.Group (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..)) import Gargantext.Core.Types (Terms(..))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment