Commit df4f16a2 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] SocialLists clean before connection

parent 9bb32e37
Pipeline #1208 failed with stage
......@@ -30,10 +30,11 @@ import qualified Data.Text as Text
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Social (flowSocialList, flowSocialList', FlowSocialListPriority(..), invertForw)
import Gargantext.Core.Text.List.Social.Group (FlowListScores)
import Gargantext.Core.Text.List.Social.Scores (FlowListScores)
import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Core.Text.Group
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
......@@ -95,7 +96,7 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
-- >8 >8 >8 >8 >8 >8 >8
let
grouped = toGroupedText (GroupedTextParams groupIt (Set.size . snd) fst snd)
grouped = toGroupedText (GroupedTextParams groupIt (Set.size . snd) fst snd {-(size . fst)-} ) -- socialLists'
$ Map.toList
$ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b))
$ ngs
......@@ -144,8 +145,6 @@ buildNgramsTermsList user uCid mCid groupParams = do
socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst allTerms)
-- printDebug "\n * socialLists * \n" socialLists
printDebug "\n * socialLists * \n" socialLists
let
_socialStop = fromMaybe Set.empty $ Map.lookup StopTerm socialLists
_socialMap = fromMaybe Set.empty $ Map.lookup MapTerm socialLists
......@@ -157,7 +156,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "stopTerms" stopTerms
-- Grouping the ngrams and keeping the maximum score for label
let grouped = toGroupedText (GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty)) allTerms
let grouped = toGroupedText ( GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped
......@@ -167,7 +166,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "\n * stopTerms * \n" stopTerms
-- splitting monterms and multiterms to take proportional candidates
let
listSizeGlobal = 2000 :: Double -- use % of list if to big, or Int if to small
listSizeGlobal = 2000 :: Double -- use % of list if to big, or Int if too small
monoSize = 0.4 :: Double
multSize = 1 - monoSize
......@@ -202,12 +201,13 @@ buildNgramsTermsList user uCid mCid groupParams = do
$ groupedMonoHead <> groupedMultHead
-- grouping with Set NodeId
contextsAdded = foldl' (\mapGroups' k -> let k' = ngramsGroup groupParams k
in case Map.lookup k' mapGroups' of
Nothing -> mapGroups'
Just g -> case Map.lookup k mapTextDocIds of
Nothing -> mapGroups'
Just ns -> Map.insert k' ( g { _gt_nodes = Set.union ns (_gt_nodes g)}) mapGroups'
contextsAdded = foldl' (\mapGroups' k ->
let k' = ngramsGroup groupParams k in
case Map.lookup k' mapGroups' of
Nothing -> mapGroups'
Just g -> case Map.lookup k mapTextDocIds of
Nothing -> mapGroups'
Just ns -> Map.insert k' ( g { _gt_nodes = Set.union ns (_gt_nodes g)}) mapGroups'
)
mapGroups
$ Map.keys mapTextDocIds
......@@ -217,6 +217,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
$ Map.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
| (t1, s1) <- mapStemNodeIds
, (t2, s2) <- mapStemNodeIds
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
]
where
mapStemNodeIds = Map.toList $ Map.map (_gt_nodes) contextsAdded
......
{-|
Module : Gargantext.Core.Text.Group
Module : Gargantext.Core.Text.List.Group
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -15,19 +15,20 @@ Portability : POSIX
{-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.Group
module Gargantext.Core.Text.List.Group
where
import Control.Lens (makeLenses, set, (^.))
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Data.Semigroup (Semigroup, (<>))
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social.Group (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..), flc_lists, flc_parents, keyWithMaxValue)
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.Set as Set
......@@ -77,20 +78,13 @@ ngramsGroup (GroupParams l _m _n _) =
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
{-
mergeMapParent :: Map Text (GroupedText b)
-> Map Text (Map Text Int)
-> Map Text (GroupedText b)
mergeMapParent = undefined
-}
------------------------------------------------------------------------
data GroupedTextParams a b =
GroupedTextParams { _gt_fun_stem :: Text -> Text
, _gt_fun_score :: a -> b
, _gt_fun_texts :: a -> Set Text
, _gt_fun_nodeIds :: a -> Set NodeId
-- , _gt_fun_size :: a -> Int
}
makeLenses 'GroupedTextParams
......@@ -100,7 +94,7 @@ toGroupedText :: Ord b
-> [(Text,a)]
-> Map Stem (GroupedText b)
toGroupedText gparams from =
Map.fromListWith grouping $ map group from
Map.fromListWith union $ map group from
where
group (t,d) = let t' = (gparams ^. gt_fun_stem) t
in (t', GroupedText
......@@ -113,35 +107,34 @@ toGroupedText gparams from =
((gparams ^. gt_fun_nodeIds) d)
)
grouping :: Ord a
=> GroupedText a
-> GroupedText a
-> GroupedText a
grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
------------------------------------------------------------------------
toGroupedText_FlowListScores :: ( FlowList a b
, Ord a
)
=> [a]
-> Map Text FlowListScores
-> Map Text (GroupedText b)
toGroupedText_FlowListScores = undefined
toGroupedText_FlowListScores' :: ( FlowList a b, Ord b)
=> Map Text c
-> Map Text FlowListScores
-> ( [a]
, Map Text (GroupedText b)
)
toGroupedText_FlowListScores' ms' scores = foldl' fun_group start ms
------------------------------------------------------------------------
toGroupedText' :: ( FlowList a b
, Ord b
)
=> GroupedTextParams a b
-> Map Text FlowListScores
-> Map Text a
-> Map Stem (GroupedText b)
toGroupedText' groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores)
groupWithStem :: ( FlowList a b
, Ord b
)
=> GroupedTextParams a b
-> ([a], Map Text (GroupedText b))
-> Map Stem (GroupedText b)
groupWithStem _ = snd -- TODO
groupWithScores :: (FlowList a b, Ord b)
=> Map Text FlowListScores
-> Map Text c
-> ([a], Map Text (GroupedText b))
groupWithScores scores ms' = foldl' fun_group start ms
where
start = ([], Map.empty)
ms = map selfParent (Map.toList ms')
......@@ -152,7 +145,7 @@ toGroupedText_FlowListScores' ms' scores = foldl' fun_group start ms
case keyWithMaxValue $ scores' ^. flc_parents of
Nothing -> (left, Map.alter (updateWith scores' current) (hasNgrams current) grouped)
Just parent -> fun_group (left, grouped) (withParent ms' parent current)
Nothing -> (current : left, grouped)
Nothing -> (current : left, grouped)
updateWith scores current Nothing = Just $ createGroupWith scores current
updateWith scores current (Just x) = Just $ updateGroupWith scores current x
......@@ -170,6 +163,32 @@ class HasGroup a b | a -> b where
class WithParent a where
selfParent :: (Text, c) -> a
withParent :: Map Text c -> Text -> a -> a
union :: a -> a -> a
------------------------------------------------------------------------
instance Ord a => WithParent (GroupedText a) where
union (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
{-
selfParent (t,d) = let t' = (gparams ^. gt_fun_stem) t
in (t', GroupedText
Nothing
t
((gparams ^. gt_fun_score) d)
((gparams ^. gt_fun_texts) d)
(size t)
t'
((gparams ^. gt_fun_nodeIds) d)
)
-}
------------------------------------------------------------------------
type Stem = Text
......@@ -196,7 +215,7 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where
compare (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = compare score1 score2
-- Lenses Instances
-- | Lenses Instances
makeLenses 'GroupedText
------------------------------------------------------------------------
......
......@@ -20,7 +20,7 @@ import Gargantext.API.Ngrams.Tools -- (getListNgrams)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.List.Social.Find
import Gargantext.Core.Text.List.Social.ListType
import Gargantext.Core.Text.List.Social.Group
import Gargantext.Core.Text.List.Social.Scores
import Gargantext.Core.Types.Individu
import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node
......@@ -115,12 +115,12 @@ flowSocialListByMode' user nt st mode =
flowSocialListByModeWith :: ( RepoCmdM env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> NgramsType -> Set Text -> [NodeId]
-> m (Map Text FlowListScores)
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> NgramsType -> Set Text -> [NodeId]
-> m (Map Text FlowListScores)
flowSocialListByModeWith nt st ns =
mapM (\l -> getListNgrams [l] nt) ns
>>= pure
......
{-|
Module : Gargantext.Core.Text.List.Social.Group
Module : Gargantext.Core.Text.List.Social.Scores
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
......@@ -15,7 +15,7 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.List.Social.Group
module Gargantext.Core.Text.List.Social.Scores
where
import Control.Lens
......
......@@ -66,7 +66,7 @@ import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Group (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.List.Group (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment