Commit 5cf6f5da authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT/FIX] Stemming -> Parent/Children -> Patch ok

parent 0ba78c07
......@@ -22,8 +22,7 @@ import Data.Ord (Down(..))
import Data.Set (Set)
import Data.Text (Text)
import Data.Tuple.Extra (both)
import Gargantext.API.Ngrams.Types (NgramsElement)
import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.API.Ngrams.Types (NgramsElement, RepoCmdM, NgramsTerm(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.List.Group.Prelude
......@@ -106,7 +105,7 @@ buildNgramsOthersList user uCid groupParams (nt, MapListSize mapListSize) = do
else printDebug "flowSocialList" ""
-}
let
groupedWithList = toGroupedTree groupParams socialLists allTerms
groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms
{-
if nt == Sources -- Authors
then printDebug "groupedWithList" groupedWithList
......@@ -158,7 +157,9 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
(List.cycle [mempty])
)
let groupedWithList = toGroupedTree groupParams socialLists allTerms
let socialLists_Stemmed = addScoreStem groupParams (Set.map NgramsTerm $ Map.keysSet allTerms) socialLists
printDebug "socialLists_Stemmed" socialLists_Stemmed
let groupedWithList = toGroupedTree {- groupParams -} socialLists_Stemmed allTerms
(stopTerms, candidateTerms) = Map.partition ((== Just StopTerm) . viewListType)
$ view flc_scores groupedWithList
......
......@@ -31,23 +31,15 @@ import Gargantext.Prelude
import qualified Data.Map as Map
------------------------------------------------------------------------
-- | TODO add group with stemming
toGroupedTree :: (Ord a, Monoid a, GroupWithStem a)
=> GroupParams
-> FlowCont Text FlowListScores
=> FlowCont Text FlowListScores
-> Map Text a
-- -> Map Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores a)
toGroupedTree groupParams flc scores = {-view flc_scores-} flow2
toGroupedTree flc scores =
groupWithScores' flc scoring
where
flow1 = groupWithScores' flc scoring
scoring t = fromMaybe mempty $ Map.lookup t scores
flow2 = case (view flc_cont flow1) == Map.empty of
True -> flow1
False -> groupWithStem' groupParams flow1
------------------------------------------------------------------------
setScoresWithMap :: (Ord a, Ord b, Monoid b) => Map Text b
......
......@@ -18,20 +18,34 @@ module Gargantext.Core.Text.List.Group.WithStem
where
import Control.Lens (view, over)
import Data.Set (Set)
import Data.Map (Map)
import Data.Maybe (catMaybes)
import Data.Monoid (mempty)
import Data.Set (Set)
import Data.Text (Text)
import Gargantext.API.Ngrams.Types
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict.Patch as PatchMap
import qualified Data.Patch.Class as Patch (Replace(..))
import qualified Data.Set as Set
import qualified Data.Text as Text
------------------------------------------------------------------------
addScoreStem :: GroupParams
-> Set NgramsTerm
-> FlowCont Text FlowListScores
-> FlowCont Text FlowListScores
addScoreStem groupParams ngrams fl = foldl' addScorePatch fl
$ stemPatches groupParams ngrams
------------------------------------------------------------------------
-- | Main Types
data StopSize = StopSize {unStopSize :: !Int}
......@@ -49,19 +63,6 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
| GroupIdentity
deriving (Eq)
------------------------------------------------------------------------
class GroupWithStem a where
groupWithStem' :: GroupParams
-> FlowCont Text (GroupedTreeScores a)
-> FlowCont Text (GroupedTreeScores a)
-- TODO factorize groupWithStem_*
instance GroupWithStem (Set NodeId) where
groupWithStem' = groupWithStem_SetNodeId
instance GroupWithStem Double where
groupWithStem' = groupWithStem_Double
------------------------------------------------------------------------
groupWith :: GroupParams
-> Text
......@@ -75,8 +76,60 @@ groupWith (GroupParams l _m _n _) =
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
--------------------------------------------------------------------
stemPatches :: GroupParams
-> Set NgramsTerm
-> [(NgramsTerm, NgramsPatch)]
stemPatches groupParams = patches
. Map.fromListWith (<>)
. map (\ng@(NgramsTerm t) -> ( groupWith groupParams t
, Set.singleton ng)
)
. Set.toList
-- | For now all NgramsTerm which have same stem
-- are grouped together
-- Parent is taken arbitrarly for now (TODO use a score like occ)
patches :: Map Stem (Set NgramsTerm)
-> [(NgramsTerm, NgramsPatch)]
patches = catMaybes . map patch . Map.elems
patch :: Set NgramsTerm
-> Maybe (NgramsTerm, NgramsPatch)
patch s = case Set.size s > 1 of
False -> Nothing
True -> do
let ngrams = Set.toList s
parent <- headMay ngrams
let children = List.tail ngrams
pure (parent, toNgramsPatch children)
toNgramsPatch :: [NgramsTerm] -> NgramsPatch
toNgramsPatch children = NgramsPatch children' Patch.Keep
where
children' :: PatchMSet NgramsTerm
children' = PatchMSet
$ fst
$ PatchMap.fromList
$ List.zip children (List.cycle [addPatch])
------------------------------------------------------------------------
-- 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< - 8< --
-- TODO remove below
------------------------------------------------------------------------
class GroupWithStem a where
groupWithStem' :: GroupParams
-> FlowCont Text (GroupedTreeScores a)
-> FlowCont Text (GroupedTreeScores a)
-- TODO factorize groupWithStem_*
instance GroupWithStem (Set NodeId) where
groupWithStem' = groupWithStem_SetNodeId
instance GroupWithStem Double where
groupWithStem' = groupWithStem_Double
groupWithStem_SetNodeId :: GroupParams
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
......@@ -223,3 +276,4 @@ mergeWith_a fun flc = FlowCont scores mempty
-}
......@@ -42,11 +42,12 @@ flowSocialListPriority :: FlowSocialListPriority -> [NodeMode]
flowSocialListPriority MySelfFirst = [Private{-, Shared, Public -}]
flowSocialListPriority OthersFirst = reverse $ flowSocialListPriority MySelfFirst
{-
-- | We keep the parents for all ngrams but terms
keepAllParents :: NgramsType -> KeepAllParents
keepAllParents NgramsTerms = KeepAllParents False
keepAllParents _ = KeepAllParents True
-}
------------------------------------------------------------------------
flowSocialList :: ( RepoCmdM env err m
......
......@@ -21,7 +21,7 @@ import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Map.Strict as Map
-- TODO put this in Prelude maybe
-- TODO put this in Prelude
cons :: a -> [a]
cons a = [a]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment