Commit 86473b50 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] MergeWith stem done (before flow integration).

parent d017571f
......@@ -63,7 +63,7 @@ buildNgramsLists :: ( RepoCmdM env err m
-> m (Map NgramsType [NgramsElement])
buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user uCid mCid gp
othersTerms <- mapM (buildNgramsOthersList user uCid (ngramsGroup GroupIdentity))
othersTerms <- mapM (buildNgramsOthersList user uCid GroupIdentity)
[ (Authors , MapListSize 9)
, (Sources , MapListSize 9)
, (Institutes, MapListSize 9)
......@@ -81,7 +81,7 @@ buildNgramsOthersList ::( HasNodeError err
)
=> User
-> UserCorpusId
-> (Text -> Text)
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
......@@ -102,8 +102,7 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
-}
let
groupParams = GroupedTextParams groupIt (Set.size . snd) fst snd {-(size . fst)-}
groupedWithList = toGroupedTreeText groupParams socialLists' ngs'
groupedWithList = toGroupedTreeText groupIt socialLists' ngs'
{-
printDebug "groupedWithList"
......@@ -153,7 +152,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "\n * socialLists * \n" socialLists
-- Grouping the ngrams and keeping the maximum score for label
let grouped = groupedTextWithStem ( GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
let grouped = groupedTextWithStem ( GroupedTextParams (groupWith groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped
......@@ -190,7 +189,10 @@ buildNgramsTermsList user uCid mCid groupParams = do
userListId <- defaultList uCid
masterListId <- defaultList mCid
mapTextDocIds <- getNodesByNgramsOnlyUser uCid [userListId, masterListId] NgramsTerms selectedTerms
mapTextDocIds <- getNodesByNgramsOnlyUser uCid
[userListId, masterListId]
NgramsTerms
selectedTerms
let
mapGroups = Map.fromList
......@@ -199,7 +201,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- grouping with Set NodeId
contextsAdded = foldl' (\mapGroups' k ->
let k' = ngramsGroup groupParams k in
let k' = groupWith groupParams k in
case Map.lookup k' mapGroups' of
Nothing -> mapGroups'
Just g -> case Map.lookup k mapTextDocIds of
......
......@@ -35,38 +35,32 @@ import qualified Data.Map as Map
import qualified Data.List as List
------------------------------------------------------------------------
toGroupedText :: GroupedTextParams a b
-> Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Stem (GroupedText Int)
toGroupedText groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores)
-- | TODO add group with stemming
toGroupedTreeText :: GroupedTextParams a b
toGroupedTreeText :: GroupParams
-> FlowCont Text FlowListScores
-> Map Text (Set NodeId)
-> Map Text (GroupedTreeScores (Set NodeId))
toGroupedTreeText _groupParams flc scores = view flc_scores flow1
toGroupedTreeText groupParams flc scores = view flc_scores flow2
where
flow1 = groupWithScores' flc scoring
scoring t = fromMaybe Set.empty $ Map.lookup t scores
{-
flow2 = case flc_cont flow1 == Set.empty of
True -> view flc_scores flow1
flow2 = case (view flc_cont flow1) == Map.empty of
True -> flow1
False -> groupWithStem' groupParams flow1
groupWithStem' :: GroupedTextParams a b
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
groupWithStem' _groupParams = identity
-}
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO To be removed
toGroupedText :: GroupedTextParams a b
-> Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Stem (GroupedText Int)
toGroupedText groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores)
addListType :: Map Text ListType -> GroupedText a -> GroupedText a
addListType m g = set gt_listType (hasListType m g) g
where
......
......@@ -17,7 +17,7 @@ Portability : POSIX
module Gargantext.Core.Text.List.Group.WithStem
where
import Control.Lens (makeLenses, view)
import Control.Lens (makeLenses, view, over)
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
......@@ -26,6 +26,7 @@ import Gargantext.Core.Text (size)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.Set as Set
......@@ -48,6 +49,70 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
| GroupIdentity
------------------------------------------------------------------------
groupWithStem' :: GroupParams
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
groupWithStem' = mergeWith . groupWith
-- | MergeWith : with stem, we always have an answer
-- if Maybe lems then we should add it to continuation
mergeWith :: (Text -> Text)
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
mergeWith fun flc = FlowCont scores Map.empty
where
scores :: Map Text (GroupedTreeScores (Set NodeId))
scores = foldl' (alter (mapStems scores')) scores' cont'
where
scores' = view flc_scores flc
cont' = Map.toList $ view flc_cont flc
-- TODO inserti at the right place in group hierarchy
-- adding as child of the parent for now
alter :: Map Stem Text
-> Map Text (GroupedTreeScores (Set NodeId))
-> (Text, GroupedTreeScores (Set NodeId))
-> Map Text (GroupedTreeScores (Set NodeId))
alter st target (t,g) = case Map.lookup t st of
Nothing -> Map.alter (alter' (t,g)) t target
Just t' -> Map.alter (alter' (t,g)) t' target
alter' (_t,g) Nothing = Just g
alter' ( t,g) (Just g') = Just $ over gts'_children
( Map.union (Map.singleton t g))
g'
mapStems :: Map Text (GroupedTreeScores (Set NodeId))
-> Map Stem Text
mapStems = (Map.fromListWith (<>)) . List.concat . (map mapStem) . Map.toList
mapStem :: (Text, GroupedTreeScores (Set NodeId))
-> [(Stem, Text)]
mapStem (s,g) = parent : children
where
parent = (fun s, s)
children = List.concat $ map mapStem (Map.toList $ view gts'_children g)
groupWith :: GroupParams
-> Text
-> Text
groupWith GroupIdentity = identity
groupWith (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-------------------------------------------------------------------
-- TODO to remove
data GroupedTextParams a b =
GroupedTextParams { _gt_fun_stem :: Text -> Text
, _gt_fun_score :: a -> b
......@@ -57,7 +122,7 @@ data GroupedTextParams a b =
}
makeLenses 'GroupedTextParams
------------------------------------------------------------------------
groupWithStem :: {- ( HasNgrams a
, HasGroupWithScores a b
, Semigroup a
......@@ -78,19 +143,6 @@ scores2groupedText t g = GroupedText (view gts_listType g)
(view gts_score g)
------------------------------------------------------------------------
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
groupedTextWithStem :: Ord b
=> GroupedTextParams a b
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment