Commit 86473b50 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] MergeWith stem done (before flow integration).

parent d017571f
...@@ -63,7 +63,7 @@ buildNgramsLists :: ( RepoCmdM env err m ...@@ -63,7 +63,7 @@ buildNgramsLists :: ( RepoCmdM env err m
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsLists user gp uCid mCid = do buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user uCid mCid gp ngTerms <- buildNgramsTermsList user uCid mCid gp
othersTerms <- mapM (buildNgramsOthersList user uCid (ngramsGroup GroupIdentity)) othersTerms <- mapM (buildNgramsOthersList user uCid GroupIdentity)
[ (Authors , MapListSize 9) [ (Authors , MapListSize 9)
, (Sources , MapListSize 9) , (Sources , MapListSize 9)
, (Institutes, MapListSize 9) , (Institutes, MapListSize 9)
...@@ -81,7 +81,7 @@ buildNgramsOthersList ::( HasNodeError err ...@@ -81,7 +81,7 @@ buildNgramsOthersList ::( HasNodeError err
) )
=> User => User
-> UserCorpusId -> UserCorpusId
-> (Text -> Text) -> GroupParams
-> (NgramsType, MapListSize) -> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
...@@ -102,8 +102,7 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do ...@@ -102,8 +102,7 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
-} -}
let let
groupParams = GroupedTextParams groupIt (Set.size . snd) fst snd {-(size . fst)-} groupedWithList = toGroupedTreeText groupIt socialLists' ngs'
groupedWithList = toGroupedTreeText groupParams socialLists' ngs'
{- {-
printDebug "groupedWithList" printDebug "groupedWithList"
...@@ -153,7 +152,7 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -153,7 +152,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "\n * socialLists * \n" socialLists -- printDebug "\n * socialLists * \n" socialLists
-- Grouping the ngrams and keeping the maximum score for label -- Grouping the ngrams and keeping the maximum score for label
let grouped = groupedTextWithStem ( GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms let grouped = groupedTextWithStem ( GroupedTextParams (groupWith groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped groupedWithList = map (addListType (invertForw socialLists)) grouped
...@@ -190,7 +189,10 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -190,7 +189,10 @@ buildNgramsTermsList user uCid mCid groupParams = do
userListId <- defaultList uCid userListId <- defaultList uCid
masterListId <- defaultList mCid masterListId <- defaultList mCid
mapTextDocIds <- getNodesByNgramsOnlyUser uCid [userListId, masterListId] NgramsTerms selectedTerms mapTextDocIds <- getNodesByNgramsOnlyUser uCid
[userListId, masterListId]
NgramsTerms
selectedTerms
let let
mapGroups = Map.fromList mapGroups = Map.fromList
...@@ -199,7 +201,7 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -199,7 +201,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- grouping with Set NodeId -- grouping with Set NodeId
contextsAdded = foldl' (\mapGroups' k -> contextsAdded = foldl' (\mapGroups' k ->
let k' = ngramsGroup groupParams k in let k' = groupWith groupParams k in
case Map.lookup k' mapGroups' of case Map.lookup k' mapGroups' of
Nothing -> mapGroups' Nothing -> mapGroups'
Just g -> case Map.lookup k mapTextDocIds of Just g -> case Map.lookup k mapTextDocIds of
......
...@@ -35,38 +35,32 @@ import qualified Data.Map as Map ...@@ -35,38 +35,32 @@ import qualified Data.Map as Map
import qualified Data.List as List import qualified Data.List as List
------------------------------------------------------------------------ ------------------------------------------------------------------------
toGroupedText :: GroupedTextParams a b
-> Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Stem (GroupedText Int)
toGroupedText groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores)
-- | TODO add group with stemming -- | TODO add group with stemming
toGroupedTreeText :: GroupedTextParams a b toGroupedTreeText :: GroupParams
-> FlowCont Text FlowListScores -> FlowCont Text FlowListScores
-> Map Text (Set NodeId) -> Map Text (Set NodeId)
-> Map Text (GroupedTreeScores (Set NodeId)) -> Map Text (GroupedTreeScores (Set NodeId))
toGroupedTreeText _groupParams flc scores = view flc_scores flow1 toGroupedTreeText groupParams flc scores = view flc_scores flow2
where where
flow1 = groupWithScores' flc scoring flow1 = groupWithScores' flc scoring
scoring t = fromMaybe Set.empty $ Map.lookup t scores scoring t = fromMaybe Set.empty $ Map.lookup t scores
{- flow2 = case (view flc_cont flow1) == Map.empty of
flow2 = case flc_cont flow1 == Set.empty of True -> flow1
True -> view flc_scores flow1
False -> groupWithStem' groupParams flow1 False -> groupWithStem' groupParams flow1
groupWithStem' :: GroupedTextParams a b
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
groupWithStem' _groupParams = identity
-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO To be removed -- | TODO To be removed
toGroupedText :: GroupedTextParams a b
-> Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Stem (GroupedText Int)
toGroupedText groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores)
addListType :: Map Text ListType -> GroupedText a -> GroupedText a addListType :: Map Text ListType -> GroupedText a -> GroupedText a
addListType m g = set gt_listType (hasListType m g) g addListType m g = set gt_listType (hasListType m g) g
where where
......
...@@ -17,7 +17,7 @@ Portability : POSIX ...@@ -17,7 +17,7 @@ Portability : POSIX
module Gargantext.Core.Text.List.Group.WithStem module Gargantext.Core.Text.List.Group.WithStem
where where
import Control.Lens (makeLenses, view) import Control.Lens (makeLenses, view, over)
import Data.Set (Set) import Data.Set (Set)
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
...@@ -26,6 +26,7 @@ import Gargantext.Core.Text (size) ...@@ -26,6 +26,7 @@ import Gargantext.Core.Text (size)
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Group.WithScores import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.List.Group.Prelude import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -48,6 +49,70 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang ...@@ -48,6 +49,70 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
| GroupIdentity | GroupIdentity
------------------------------------------------------------------------
groupWithStem' :: GroupParams
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
groupWithStem' = mergeWith . groupWith
-- | MergeWith : with stem, we always have an answer
-- if Maybe lems then we should add it to continuation
mergeWith :: (Text -> Text)
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
mergeWith fun flc = FlowCont scores Map.empty
where
scores :: Map Text (GroupedTreeScores (Set NodeId))
scores = foldl' (alter (mapStems scores')) scores' cont'
where
scores' = view flc_scores flc
cont' = Map.toList $ view flc_cont flc
-- TODO inserti at the right place in group hierarchy
-- adding as child of the parent for now
alter :: Map Stem Text
-> Map Text (GroupedTreeScores (Set NodeId))
-> (Text, GroupedTreeScores (Set NodeId))
-> Map Text (GroupedTreeScores (Set NodeId))
alter st target (t,g) = case Map.lookup t st of
Nothing -> Map.alter (alter' (t,g)) t target
Just t' -> Map.alter (alter' (t,g)) t' target
alter' (_t,g) Nothing = Just g
alter' ( t,g) (Just g') = Just $ over gts'_children
( Map.union (Map.singleton t g))
g'
mapStems :: Map Text (GroupedTreeScores (Set NodeId))
-> Map Stem Text
mapStems = (Map.fromListWith (<>)) . List.concat . (map mapStem) . Map.toList
mapStem :: (Text, GroupedTreeScores (Set NodeId))
-> [(Stem, Text)]
mapStem (s,g) = parent : children
where
parent = (fun s, s)
children = List.concat $ map mapStem (Map.toList $ view gts'_children g)
groupWith :: GroupParams
-> Text
-> Text
groupWith GroupIdentity = identity
groupWith (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-------------------------------------------------------------------
-- TODO to remove
data GroupedTextParams a b = data GroupedTextParams a b =
GroupedTextParams { _gt_fun_stem :: Text -> Text GroupedTextParams { _gt_fun_stem :: Text -> Text
, _gt_fun_score :: a -> b , _gt_fun_score :: a -> b
...@@ -57,7 +122,7 @@ data GroupedTextParams a b = ...@@ -57,7 +122,7 @@ data GroupedTextParams a b =
} }
makeLenses 'GroupedTextParams makeLenses 'GroupedTextParams
------------------------------------------------------------------------
groupWithStem :: {- ( HasNgrams a groupWithStem :: {- ( HasNgrams a
, HasGroupWithScores a b , HasGroupWithScores a b
, Semigroup a , Semigroup a
...@@ -78,19 +143,6 @@ scores2groupedText t g = GroupedText (view gts_listType g) ...@@ -78,19 +143,6 @@ scores2groupedText t g = GroupedText (view gts_listType g)
(view gts_score g) (view gts_score g)
------------------------------------------------------------------------ ------------------------------------------------------------------------
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------ ------------------------------------------------------------------------
groupedTextWithStem :: Ord b groupedTextWithStem :: Ord b
=> GroupedTextParams a b => GroupedTextParams a b
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment