Commit af817f81 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[SocialList] refactored to be applied to others lists (needs Tree of grouped...

[SocialList] refactored to be applied to others lists (needs Tree of grouped terms next), not tested yet
parent 5c57aefc
......@@ -19,6 +19,7 @@ import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Learn (Model(..))
......@@ -30,6 +31,7 @@ import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !StopSize
......@@ -56,10 +58,12 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize
}
| GroupIdentity
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
. map (stem l)
-- . take n
......@@ -68,6 +72,42 @@ ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------------
toGroupedText :: Ord b
=> (Text -> Text)
-> (a -> b)
-> (a -> Set Text)
-> (a -> Set NodeId)
-> [(Text,a)]
-> Map Stem (GroupedText b)
toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
where
group (t,d) = let t' = fun_stem t
in (t', GroupedText
Nothing
t
(fun_score d)
(fun_texts d)
(size t)
t'
(fun_nodeIds d)
)
groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
groupStems = Map.elems . groupStems'
groupStems' :: Ord a => [(Stem, GroupedText a)] -> Map Stem (GroupedText a)
groupStems' = Map.fromListWith grouping
where
grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
------------------------------------------------------------------------------
type Group = Lang -> Int -> Int -> Text -> Text
type Stem = Text
......
......@@ -29,7 +29,6 @@ import qualified Data.Text as Text
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Social (flowSocialList, invertForw)
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Core.Text.Group
......@@ -58,10 +57,11 @@ buildNgramsLists :: ( RepoCmdM env err m
-> m (Map NgramsType [NgramsElement])
buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user uCid mCid gp
othersTerms <- mapM (buildNgramsOthersList user uCid identity)
[Authors, Sources, Institutes]
othersTerms <- mapM (buildNgramsOthersList user uCid (ngramsGroup GroupIdentity))
[(Authors, 5), (Sources, 7), (Institutes, 9)]
pure $ Map.unions $ othersTerms <> [ngTerms]
type MapListSize = Int
buildNgramsOthersList ::( HasNodeError err
, CmdM env err m
......@@ -71,21 +71,34 @@ buildNgramsOthersList ::( HasNodeError err
=> User
-> UserCorpusId
-> (Text -> Text)
-> NgramsType
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsOthersList _user uCid groupIt nt = do
buildNgramsOthersList user uCid groupIt (nt, mapListSize) = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt
let
listSize = 9
all' = List.sortOn (Down . Set.size . snd . snd)
$ Map.toList ngs
grouped = toGroupedText groupIt (Set.size . snd) fst snd (Map.toList ngs)
(graphTerms, candiTerms) = List.splitAt listSize all'
socialLists <- flowSocialList user NgramsTerms (Set.fromList $ Map.keys ngs)
pure $ Map.unionsWith (<>) [ toElements nt MapTerm graphTerms
, toElements nt CandidateTerm candiTerms
let
groupedWithList = map (addListType (invertForw socialLists)) grouped
(stopTerms, tailTerms ) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList
(graphTerms, tailTerms') = Map.partition (\t -> t ^. gt_listType == Just MapTerm) tailTerms
listSize = mapListSize - (List.length graphTerms)
(graphTerms', candiTerms) = List.splitAt listSize $ List.sortOn (Down . _gt_score) $ Map.elems tailTerms'
let result = Map.unionsWith (<>)
[ Map.fromList [(
NgramsTerms, (List.concat $ map toNgramsElement $ stopTerms)
<> (List.concat $ map toNgramsElement $ graphTerms)
<> (List.concat $ map toNgramsElement $ graphTerms')
<> (List.concat $ map toNgramsElement $ candiTerms)
)]
]
pure result
toElements :: Ord k => k -> ListType -> [(Text, b)] -> Map k [NgramsElement]
toElements nType lType x =
......@@ -119,16 +132,12 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- Grouping the ngrams and keeping the maximum score for label
let grouped = groupStems'
$ map (\(t,d) -> let stem = ngramsGroup groupParams t
in ( stem
, GroupedText Nothing t d Set.empty (size t) stem Set.empty
)
) allTerms
let grouped = toGroupedText (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped
(stopTerms, candidateTerms) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList
(groupedMono, groupedMult) = Map.partition (\gt -> gt ^. gt_size < 2) candidateTerms
(groupedMono, groupedMult) = Map.partition (\t -> t ^. gt_size < 2) candidateTerms
-- printDebug "\n * stopTerms * \n" stopTerms
-- splitting monterms and multiterms to take proportional candidates
......@@ -163,7 +172,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
mapTextDocIds <- getNodesByNgramsOnlyUser uCid [userListId, masterListId] NgramsTerms selectedTerms
let
mapGroups = Map.fromList
$ map (\g -> (_gt_stem g, g))
$ map (\g -> (g ^. gt_stem, g))
$ groupedMonoHead <> groupedMultHead
-- grouping with Set NodeId
......@@ -231,7 +240,6 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- Final Step building the Typed list
termListHead = maps <> cands
where
maps = set gt_listType (Just MapTerm)
<$> monoScoredInclHead
<> monoScoredExclHead
......@@ -262,22 +270,6 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "\n result \n" r
pure result
groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
groupStems = Map.elems . groupStems'
groupStems' :: [(Stem, GroupedText Double)] -> Map Stem (GroupedText Double)
groupStems' = Map.fromListWith grouping
where
grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
toNgramsElement :: GroupedText a -> [NgramsElement]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment