Commit af817f81 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[SocialList] refactored to be applied to others lists (needs Tree of grouped...

[SocialList] refactored to be applied to others lists (needs Tree of grouped terms next), not tested yet
parent 5c57aefc
...@@ -19,6 +19,7 @@ import Data.Set (Set) ...@@ -19,6 +19,7 @@ import Data.Set (Set)
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..)) import Gargantext.Core.Types (ListType(..))
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Learn (Model(..)) import Gargantext.Core.Text.List.Learn (Model(..))
...@@ -30,6 +31,7 @@ import qualified Data.Map as Map ...@@ -30,6 +31,7 @@ import qualified Data.Map as Map
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Text as Text import qualified Data.Text as Text
data NgramsListBuilder = BuilderStepO { stemSize :: !Int data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int , stemX :: !Int
, stopSize :: !StopSize , stopSize :: !StopSize
...@@ -56,10 +58,12 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang ...@@ -56,10 +58,12 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_limit :: !Int , unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize , unGroupParams_stopSize :: !StopSize
} }
| GroupIdentity
ngramsGroup :: GroupParams ngramsGroup :: GroupParams
-> Text -> Text
-> Text -> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) = Text.intercalate " " ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
. map (stem l) . map (stem l)
-- . take n -- . take n
...@@ -68,6 +72,42 @@ ngramsGroup (GroupParams l _m _n _) = Text.intercalate " " ...@@ -68,6 +72,42 @@ ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
. Text.splitOn " " . Text.splitOn " "
. Text.replace "-" " " . Text.replace "-" " "
------------------------------------------------------------------------------
toGroupedText :: Ord b
=> (Text -> Text)
-> (a -> b)
-> (a -> Set Text)
-> (a -> Set NodeId)
-> [(Text,a)]
-> Map Stem (GroupedText b)
toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
where
group (t,d) = let t' = fun_stem t
in (t', GroupedText
Nothing
t
(fun_score d)
(fun_texts d)
(size t)
t'
(fun_nodeIds d)
)
groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
groupStems = Map.elems . groupStems'
groupStems' :: Ord a => [(Stem, GroupedText a)] -> Map Stem (GroupedText a)
groupStems' = Map.fromListWith grouping
where
grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
type Group = Lang -> Int -> Int -> Text -> Text type Group = Lang -> Int -> Int -> Text -> Text
type Stem = Text type Stem = Text
......
...@@ -29,7 +29,6 @@ import qualified Data.Text as Text ...@@ -29,7 +29,6 @@ import qualified Data.Text as Text
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..)) -- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList) import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.API.Ngrams.Types (RepoCmdM) import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Social (flowSocialList, invertForw) import Gargantext.Core.Text.List.Social (flowSocialList, invertForw)
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal) import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Core.Text.Group import Gargantext.Core.Text.Group
...@@ -58,10 +57,11 @@ buildNgramsLists :: ( RepoCmdM env err m ...@@ -58,10 +57,11 @@ buildNgramsLists :: ( RepoCmdM env err m
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsLists user gp uCid mCid = do buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user uCid mCid gp ngTerms <- buildNgramsTermsList user uCid mCid gp
othersTerms <- mapM (buildNgramsOthersList user uCid identity) othersTerms <- mapM (buildNgramsOthersList user uCid (ngramsGroup GroupIdentity))
[Authors, Sources, Institutes] [(Authors, 5), (Sources, 7), (Institutes, 9)]
pure $ Map.unions $ othersTerms <> [ngTerms] pure $ Map.unions $ othersTerms <> [ngTerms]
type MapListSize = Int
buildNgramsOthersList ::( HasNodeError err buildNgramsOthersList ::( HasNodeError err
, CmdM env err m , CmdM env err m
...@@ -71,21 +71,34 @@ buildNgramsOthersList ::( HasNodeError err ...@@ -71,21 +71,34 @@ buildNgramsOthersList ::( HasNodeError err
=> User => User
-> UserCorpusId -> UserCorpusId
-> (Text -> Text) -> (Text -> Text)
-> NgramsType -> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsOthersList _user uCid groupIt nt = do buildNgramsOthersList user uCid groupIt (nt, mapListSize) = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt
let let
listSize = 9 grouped = toGroupedText groupIt (Set.size . snd) fst snd (Map.toList ngs)
all' = List.sortOn (Down . Set.size . snd . snd)
$ Map.toList ngs
(graphTerms, candiTerms) = List.splitAt listSize all' socialLists <- flowSocialList user NgramsTerms (Set.fromList $ Map.keys ngs)
pure $ Map.unionsWith (<>) [ toElements nt MapTerm graphTerms let
, toElements nt CandidateTerm candiTerms groupedWithList = map (addListType (invertForw socialLists)) grouped
(stopTerms, tailTerms ) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList
(graphTerms, tailTerms') = Map.partition (\t -> t ^. gt_listType == Just MapTerm) tailTerms
listSize = mapListSize - (List.length graphTerms)
(graphTerms', candiTerms) = List.splitAt listSize $ List.sortOn (Down . _gt_score) $ Map.elems tailTerms'
let result = Map.unionsWith (<>)
[ Map.fromList [(
NgramsTerms, (List.concat $ map toNgramsElement $ stopTerms)
<> (List.concat $ map toNgramsElement $ graphTerms)
<> (List.concat $ map toNgramsElement $ graphTerms')
<> (List.concat $ map toNgramsElement $ candiTerms)
)]
] ]
pure result
toElements :: Ord k => k -> ListType -> [(Text, b)] -> Map k [NgramsElement] toElements :: Ord k => k -> ListType -> [(Text, b)] -> Map k [NgramsElement]
toElements nType lType x = toElements nType lType x =
...@@ -119,16 +132,12 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -119,16 +132,12 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- Grouping the ngrams and keeping the maximum score for label -- Grouping the ngrams and keeping the maximum score for label
let grouped = groupStems' let grouped = toGroupedText (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) allTerms
$ map (\(t,d) -> let stem = ngramsGroup groupParams t
in ( stem
, GroupedText Nothing t d Set.empty (size t) stem Set.empty
)
) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped groupedWithList = map (addListType (invertForw socialLists)) grouped
(stopTerms, candidateTerms) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList (stopTerms, candidateTerms) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList
(groupedMono, groupedMult) = Map.partition (\gt -> gt ^. gt_size < 2) candidateTerms (groupedMono, groupedMult) = Map.partition (\t -> t ^. gt_size < 2) candidateTerms
-- printDebug "\n * stopTerms * \n" stopTerms -- printDebug "\n * stopTerms * \n" stopTerms
-- splitting monterms and multiterms to take proportional candidates -- splitting monterms and multiterms to take proportional candidates
...@@ -163,7 +172,7 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -163,7 +172,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
mapTextDocIds <- getNodesByNgramsOnlyUser uCid [userListId, masterListId] NgramsTerms selectedTerms mapTextDocIds <- getNodesByNgramsOnlyUser uCid [userListId, masterListId] NgramsTerms selectedTerms
let let
mapGroups = Map.fromList mapGroups = Map.fromList
$ map (\g -> (_gt_stem g, g)) $ map (\g -> (g ^. gt_stem, g))
$ groupedMonoHead <> groupedMultHead $ groupedMonoHead <> groupedMultHead
-- grouping with Set NodeId -- grouping with Set NodeId
...@@ -231,7 +240,6 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -231,7 +240,6 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- Final Step building the Typed list -- Final Step building the Typed list
termListHead = maps <> cands termListHead = maps <> cands
where where
maps = set gt_listType (Just MapTerm) maps = set gt_listType (Just MapTerm)
<$> monoScoredInclHead <$> monoScoredInclHead
<> monoScoredExclHead <> monoScoredExclHead
...@@ -262,22 +270,6 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -262,22 +270,6 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "\n result \n" r -- printDebug "\n result \n" r
pure result pure result
groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
groupStems = Map.elems . groupStems'
groupStems' :: [(Stem, GroupedText Double)] -> Map Stem (GroupedText Double)
groupStems' = Map.fromListWith grouping
where
grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
toNgramsElement :: GroupedText a -> [NgramsElement] toNgramsElement :: GroupedText a -> [NgramsElement]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment