Commit 5cf6f5da authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT/FIX] Stemming -> Parent/Children -> Patch ok

parent 0ba78c07
...@@ -22,8 +22,7 @@ import Data.Ord (Down(..)) ...@@ -22,8 +22,7 @@ import Data.Ord (Down(..))
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra (both) import Data.Tuple.Extra (both)
import Gargantext.API.Ngrams.Types (NgramsElement) import Gargantext.API.Ngrams.Types (NgramsElement, RepoCmdM, NgramsTerm(..))
import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core.Text (size) import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Group import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.List.Group.Prelude import Gargantext.Core.Text.List.Group.Prelude
...@@ -106,7 +105,7 @@ buildNgramsOthersList user uCid groupParams (nt, MapListSize mapListSize) = do ...@@ -106,7 +105,7 @@ buildNgramsOthersList user uCid groupParams (nt, MapListSize mapListSize) = do
else printDebug "flowSocialList" "" else printDebug "flowSocialList" ""
-} -}
let let
groupedWithList = toGroupedTree groupParams socialLists allTerms groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms
{- {-
if nt == Sources -- Authors if nt == Sources -- Authors
then printDebug "groupedWithList" groupedWithList then printDebug "groupedWithList" groupedWithList
...@@ -158,7 +157,9 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do ...@@ -158,7 +157,9 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
(List.cycle [mempty]) (List.cycle [mempty])
) )
let groupedWithList = toGroupedTree groupParams socialLists allTerms let socialLists_Stemmed = addScoreStem groupParams (Set.map NgramsTerm $ Map.keysSet allTerms) socialLists
printDebug "socialLists_Stemmed" socialLists_Stemmed
let groupedWithList = toGroupedTree {- groupParams -} socialLists_Stemmed allTerms
(stopTerms, candidateTerms) = Map.partition ((== Just StopTerm) . viewListType) (stopTerms, candidateTerms) = Map.partition ((== Just StopTerm) . viewListType)
$ view flc_scores groupedWithList $ view flc_scores groupedWithList
......
...@@ -31,23 +31,15 @@ import Gargantext.Prelude ...@@ -31,23 +31,15 @@ import Gargantext.Prelude
import qualified Data.Map as Map import qualified Data.Map as Map
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO add group with stemming
toGroupedTree :: (Ord a, Monoid a, GroupWithStem a) toGroupedTree :: (Ord a, Monoid a, GroupWithStem a)
=> GroupParams => FlowCont Text FlowListScores
-> FlowCont Text FlowListScores
-> Map Text a -> Map Text a
-- -> Map Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores a) -> FlowCont Text (GroupedTreeScores a)
toGroupedTree groupParams flc scores = {-view flc_scores-} flow2 toGroupedTree flc scores =
groupWithScores' flc scoring
where where
flow1 = groupWithScores' flc scoring
scoring t = fromMaybe mempty $ Map.lookup t scores scoring t = fromMaybe mempty $ Map.lookup t scores
flow2 = case (view flc_cont flow1) == Map.empty of
True -> flow1
False -> groupWithStem' groupParams flow1
------------------------------------------------------------------------ ------------------------------------------------------------------------
setScoresWithMap :: (Ord a, Ord b, Monoid b) => Map Text b setScoresWithMap :: (Ord a, Ord b, Monoid b) => Map Text b
......
...@@ -18,20 +18,34 @@ module Gargantext.Core.Text.List.Group.WithStem ...@@ -18,20 +18,34 @@ module Gargantext.Core.Text.List.Group.WithStem
where where
import Control.Lens (view, over) import Control.Lens (view, over)
import Data.Set (Set)
import Data.Map (Map) import Data.Map (Map)
import Data.Maybe (catMaybes)
import Data.Monoid (mempty) import Data.Monoid (mempty)
import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.API.Ngrams.Types
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Group.Prelude import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Prelude import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict.Patch as PatchMap
import qualified Data.Patch.Class as Patch (Replace(..))
import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
------------------------------------------------------------------------
addScoreStem :: GroupParams
-> Set NgramsTerm
-> FlowCont Text FlowListScores
-> FlowCont Text FlowListScores
addScoreStem groupParams ngrams fl = foldl' addScorePatch fl
$ stemPatches groupParams ngrams
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Main Types -- | Main Types
data StopSize = StopSize {unStopSize :: !Int} data StopSize = StopSize {unStopSize :: !Int}
...@@ -49,19 +63,6 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang ...@@ -49,19 +63,6 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
| GroupIdentity | GroupIdentity
deriving (Eq) deriving (Eq)
------------------------------------------------------------------------
class GroupWithStem a where
groupWithStem' :: GroupParams
-> FlowCont Text (GroupedTreeScores a)
-> FlowCont Text (GroupedTreeScores a)
-- TODO factorize groupWithStem_*
instance GroupWithStem (Set NodeId) where
groupWithStem' = groupWithStem_SetNodeId
instance GroupWithStem Double where
groupWithStem' = groupWithStem_Double
------------------------------------------------------------------------ ------------------------------------------------------------------------
groupWith :: GroupParams groupWith :: GroupParams
-> Text -> Text
...@@ -75,8 +76,60 @@ groupWith (GroupParams l _m _n _) = ...@@ -75,8 +76,60 @@ groupWith (GroupParams l _m _n _) =
-- . (List.filter (\t -> Text.length t > m)) -- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " " . Text.splitOn " "
. Text.replace "-" " " . Text.replace "-" " "
--------------------------------------------------------------------
stemPatches :: GroupParams
-> Set NgramsTerm
-> [(NgramsTerm, NgramsPatch)]
stemPatches groupParams = patches
. Map.fromListWith (<>)
. map (\ng@(NgramsTerm t) -> ( groupWith groupParams t
, Set.singleton ng)
)
. Set.toList
-- | For now all NgramsTerm which have same stem
-- are grouped together
-- Parent is taken arbitrarly for now (TODO use a score like occ)
patches :: Map Stem (Set NgramsTerm)
-> [(NgramsTerm, NgramsPatch)]
patches = catMaybes . map patch . Map.elems
patch :: Set NgramsTerm
-> Maybe (NgramsTerm, NgramsPatch)
patch s = case Set.size s > 1 of
False -> Nothing
True -> do
let ngrams = Set.toList s
parent <- headMay ngrams
let children = List.tail ngrams
pure (parent, toNgramsPatch children)
toNgramsPatch :: [NgramsTerm] -> NgramsPatch
toNgramsPatch children = NgramsPatch children' Patch.Keep
where
children' :: PatchMSet NgramsTerm
children' = PatchMSet
$ fst
$ PatchMap.fromList
$ List.zip children (List.cycle [addPatch])
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< --
-- TODO remove below
------------------------------------------------------------------------
class GroupWithStem a where
groupWithStem' :: GroupParams
-> FlowCont Text (GroupedTreeScores a)
-> FlowCont Text (GroupedTreeScores a)
-- TODO factorize groupWithStem_*
instance GroupWithStem (Set NodeId) where
groupWithStem' = groupWithStem_SetNodeId
instance GroupWithStem Double where
groupWithStem' = groupWithStem_Double
groupWithStem_SetNodeId :: GroupParams groupWithStem_SetNodeId :: GroupParams
-> FlowCont Text (GroupedTreeScores (Set NodeId)) -> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId)) -> FlowCont Text (GroupedTreeScores (Set NodeId))
...@@ -223,3 +276,4 @@ mergeWith_a fun flc = FlowCont scores mempty ...@@ -223,3 +276,4 @@ mergeWith_a fun flc = FlowCont scores mempty
-} -}
...@@ -42,11 +42,12 @@ flowSocialListPriority :: FlowSocialListPriority -> [NodeMode] ...@@ -42,11 +42,12 @@ flowSocialListPriority :: FlowSocialListPriority -> [NodeMode]
flowSocialListPriority MySelfFirst = [Private{-, Shared, Public -}] flowSocialListPriority MySelfFirst = [Private{-, Shared, Public -}]
flowSocialListPriority OthersFirst = reverse $ flowSocialListPriority MySelfFirst flowSocialListPriority OthersFirst = reverse $ flowSocialListPriority MySelfFirst
{-
-- | We keep the parents for all ngrams but terms -- | We keep the parents for all ngrams but terms
keepAllParents :: NgramsType -> KeepAllParents keepAllParents :: NgramsType -> KeepAllParents
keepAllParents NgramsTerms = KeepAllParents False keepAllParents NgramsTerms = KeepAllParents False
keepAllParents _ = KeepAllParents True keepAllParents _ = KeepAllParents True
-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
flowSocialList :: ( RepoCmdM env err m flowSocialList :: ( RepoCmdM env err m
......
...@@ -21,7 +21,7 @@ import Gargantext.Prelude ...@@ -21,7 +21,7 @@ import Gargantext.Prelude
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
-- TODO put this in Prelude maybe -- TODO put this in Prelude
cons :: a -> [a] cons :: a -> [a]
cons a = [a] cons a = [a]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment