Commit f73a9d90 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[REFACT] FlowList integration to Terms with instances

parent a188045f
...@@ -61,7 +61,7 @@ buildNgramsLists :: ( RepoCmdM env err m ...@@ -61,7 +61,7 @@ buildNgramsLists :: ( RepoCmdM env err m
-> MasterCorpusId -> MasterCorpusId
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsLists user gp uCid mCid = do buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user uCid mCid gp ngTerms <- buildNgramsTermsList user uCid mCid gp (NgramsTerms, MapListSize 350)
othersTerms <- mapM (buildNgramsOthersList user uCid GroupIdentity) othersTerms <- mapM (buildNgramsOthersList user uCid GroupIdentity)
[ (Authors , MapListSize 9) [ (Authors , MapListSize 9)
, (Sources , MapListSize 9) , (Sources , MapListSize 9)
...@@ -83,14 +83,14 @@ buildNgramsOthersList ::( HasNodeError err ...@@ -83,14 +83,14 @@ buildNgramsOthersList ::( HasNodeError err
-> GroupParams -> GroupParams
-> (NgramsType, MapListSize) -> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do buildNgramsOthersList user uCid groupParams (nt, MapListSize mapListSize) = do
ngs' :: Map Text (Set NodeId) <- getNodesByNgramsUser uCid nt allTerms :: Map Text (Set NodeId) <- getNodesByNgramsUser uCid nt
-- | PrivateFirst for first developments since Public NodeMode is not implemented yet -- | PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists' :: FlowCont Text FlowListScores socialLists' :: FlowCont Text FlowListScores
<- flowSocialList' MySelfFirst user nt ( FlowCont Map.empty <- flowSocialList' MySelfFirst user nt ( FlowCont Map.empty
$ Map.fromList $ Map.fromList
$ List.zip (Map.keys ngs') $ List.zip (Map.keys allTerms)
(List.cycle [mempty]) (List.cycle [mempty])
) )
{- {-
...@@ -100,10 +100,12 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do ...@@ -100,10 +100,12 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
-} -}
let let
groupedWithList = toGroupedTreeText groupIt socialLists' ngs' groupedWithList = toGroupedTree groupParams socialLists' allTerms
{-
printDebug "groupedWithList" printDebug "groupedWithList"
$ view flc_cont groupedWithList $ view flc_cont groupedWithList
-}
let let
(stopTerms, tailTerms) = Map.partition ((== Just StopTerm) . viewListType) $ view flc_scores groupedWithList (stopTerms, tailTerms) = Map.partition ((== Just StopTerm) . viewListType) $ view flc_scores groupedWithList
...@@ -132,23 +134,40 @@ buildNgramsTermsList :: ( HasNodeError err ...@@ -132,23 +134,40 @@ buildNgramsTermsList :: ( HasNodeError err
-> UserCorpusId -> UserCorpusId
-> MasterCorpusId -> MasterCorpusId
-> GroupParams -> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsTermsList user uCid mCid groupParams = do buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
-- Computing global speGen score -- Computing global speGen score
allTerms :: Map Text Double <- getTficf uCid mCid NgramsTerms allTerms :: Map Text Double <- getTficf uCid mCid nt
-- printDebug "head candidates" (List.take 10 $ allTerms) -- | PrivateFirst for first developments since Public NodeMode is not implemented yet
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms) socialLists' :: FlowCont Text FlowListScores
<- flowSocialList' MySelfFirst user nt ( FlowCont Map.empty
$ Map.fromList
$ List.zip (Map.keys allTerms)
(List.cycle [mempty])
)
{-
printDebug "flowSocialList'"
$ Map.filter (not . ((==) Map.empty) . (view fls_parents))
$ view flc_scores socialLists'
let
groupedWithList = toGroupedTree groupParams socialLists' allTerms
-}
-- TODO remove
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- First remove stops terms -- First remove stops terms
socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst $ Map.toList allTerms) socialLists <- flowSocialList user nt (Set.fromList $ map fst $ Map.toList allTerms)
-- printDebug "\n * socialLists * \n" socialLists
-- Grouping the ngrams and keeping the maximum score for label -- Grouping the ngrams and keeping the maximum score for label
let grouped = groupedTextWithStem ( GroupedTextParams (groupWith groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms let grouped = groupedTextWithStem ( GroupedTextParams (groupWith groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped groupedWithList = map (addListType (invertForw socialLists)) grouped
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
(stopTerms, candidateTerms) = Map.partition ((== Just StopTerm) . viewListType) groupedWithList (stopTerms, candidateTerms) = Map.partition ((== Just StopTerm) . viewListType) groupedWithList
(groupedMono, groupedMult) = Map.partition (\t -> t ^. gt_size < 2) candidateTerms (groupedMono, groupedMult) = Map.partition (\t -> t ^. gt_size < 2) candidateTerms
...@@ -188,7 +207,7 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -188,7 +207,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
mapTextDocIds <- getNodesByNgramsOnlyUser uCid mapTextDocIds <- getNodesByNgramsOnlyUser uCid
[userListId, masterListId] [userListId, masterListId]
NgramsTerms nt
selectedTerms selectedTerms
let let
...@@ -284,9 +303,9 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -284,9 +303,9 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "multScoredExclTail" multScoredExclTail -- printDebug "multScoredExclTail" multScoredExclTail
let result = Map.unionsWith (<>) let result = Map.unionsWith (<>)
[ Map.fromList [( NgramsTerms, (List.concat $ map toNgramsElement $ termListHead) [ Map.fromList [( nt, (List.concat $ map toNgramsElement $ termListHead)
<> (List.concat $ map toNgramsElement $ termListTail) <> (List.concat $ map toNgramsElement $ termListTail)
<> (List.concat $ map toNgramsElement $ stopTerms) <> (List.concat $ map toNgramsElement $ stopTerms)
)] )]
] ]
-- printDebug "\n result \n" r -- printDebug "\n result \n" r
......
...@@ -13,7 +13,7 @@ Portability : POSIX ...@@ -13,7 +13,7 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Group module Gargantext.Core.Text.List.Group
where where
...@@ -22,6 +22,7 @@ import Control.Lens (set, view) ...@@ -22,6 +22,7 @@ import Control.Lens (set, view)
import Data.Set (Set) import Data.Set (Set)
import Data.Map (Map) import Data.Map (Map)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Monoid (mempty)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.Core.Types (ListType(..)) import Gargantext.Core.Types (ListType(..))
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
...@@ -36,23 +37,50 @@ import qualified Data.List as List ...@@ -36,23 +37,50 @@ import qualified Data.List as List
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO add group with stemming -- | TODO add group with stemming
toGroupedTreeText :: GroupParams
class ToGroupedTree a b | a -> b where
toGroupedTree :: GroupParams
-> FlowCont Text FlowListScores
-> a
-> FlowCont Text (GroupedTreeScores b)
instance ToGroupedTree (Map Text (Set NodeId)) (Set NodeId)
where
toGroupedTree :: GroupParams
-> FlowCont Text FlowListScores -> FlowCont Text FlowListScores
-> Map Text (Set NodeId) -> Map Text (Set NodeId)
-- -> Map Text (GroupedTreeScores (Set NodeId)) -- -> Map Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId)) -> FlowCont Text (GroupedTreeScores (Set NodeId))
toGroupedTreeText groupParams flc scores = {-view flc_scores-} flow2 toGroupedTree groupParams flc scores = {-view flc_scores-} flow2
where where
flow1 = groupWithScores' flc scoring flow1 = groupWithScores'' flc scoring
scoring t = fromMaybe Set.empty $ Map.lookup t scores scoring t = fromMaybe Set.empty $ Map.lookup t scores
flow2 = case (view flc_cont flow1) == Map.empty of flow2 = case (view flc_cont flow1) == Map.empty of
True -> flow1 True -> flow1
False -> groupWithStem' groupParams flow1 False -> groupWithStem' groupParams flow1
{-
instance ToGroupedTree (Map Text Double) Double
where
toGroupedTree :: GroupParams
-> FlowCont Text FlowListScores
-> Map Text Double
-- -> Map Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores Double)
toGroupedTree groupParams flc scores = {-view flc_scores-} flow2
where
flow1 = groupWithScores'' flc scoring
scoring t = fromMaybe mempty $ Map.lookup t scores
flow2 = case (view flc_cont flow1) == Map.empty of
True -> flow1
False -> groupWithStem' groupParams flow1
-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- | TODO To be removed -- | TODO To be removed
toGroupedText :: GroupedTextParams a b toGroupedText :: GroupedTextParams a b
-> Map Text FlowListScores -> Map Text FlowListScores
......
...@@ -10,6 +10,7 @@ Portability : POSIX ...@@ -10,6 +10,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Group.WithScores module Gargantext.Core.Text.List.Group.WithScores
where where
...@@ -31,8 +32,19 @@ import qualified Data.Set as Set ...@@ -31,8 +32,19 @@ import qualified Data.Set as Set
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
class GroupWithScore a where
groupWithScores'' :: FlowCont Text FlowListScores
-> (Text -> a) -- Map Text (Set NodeId)
-> FlowCont Text (GroupedTreeScores a)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Main function -- | Main function
instance GroupWithScore (Set NodeId) where
groupWithScores'' = groupWithScores'
groupWithScores' :: FlowCont Text FlowListScores groupWithScores' :: FlowCont Text FlowListScores
-> (Text -> Set NodeId) -- Map Text (Set NodeId) -> (Text -> Set NodeId) -- Map Text (Set NodeId)
-> FlowCont Text (GroupedTreeScores (Set NodeId)) -> FlowCont Text (GroupedTreeScores (Set NodeId))
......
...@@ -35,6 +35,7 @@ import qualified Data.Map as Map ...@@ -35,6 +35,7 @@ import qualified Data.Map as Map
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Text as Text import qualified Data.Text as Text
------------------------------------------------------------------------
-- | Main Types -- | Main Types
data StopSize = StopSize {unStopSize :: !Int} data StopSize = StopSize {unStopSize :: !Int}
deriving (Eq) deriving (Eq)
...@@ -52,16 +53,55 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang ...@@ -52,16 +53,55 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
deriving (Eq) deriving (Eq)
------------------------------------------------------------------------ ------------------------------------------------------------------------
groupWithStem' :: GroupParams class GroupWithStem a where
groupWithStem' :: GroupParams
-> FlowCont Text (GroupedTreeScores a)
-> FlowCont Text (GroupedTreeScores a)
-- TODO factorize groupWithStem_*
instance GroupWithStem (Set NodeId) where
groupWithStem' = groupWithStem_SetNodeId
instance GroupWithStem Double where
groupWithStem' = groupWithStem_Double
------------------------------------------------------------------------
groupWith :: GroupParams
-> Text
-> Text
groupWith GroupIdentity = identity
groupWith (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
groupWithStem_SetNodeId :: GroupParams
-> FlowCont Text (GroupedTreeScores (Set NodeId)) -> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId)) -> FlowCont Text (GroupedTreeScores (Set NodeId))
groupWithStem' g flc groupWithStem_SetNodeId g flc
| g == GroupIdentity = FlowCont ( (<>) | g == GroupIdentity = FlowCont ( (<>)
(view flc_scores flc) (view flc_scores flc)
(view flc_cont flc) (view flc_cont flc)
) mempty ) mempty
| otherwise = mergeWith (groupWith g) flc | otherwise = mergeWith (groupWith g) flc
groupWithStem_Double :: GroupParams
-> FlowCont Text (GroupedTreeScores Double)
-> FlowCont Text (GroupedTreeScores Double)
groupWithStem_Double g flc
| g == GroupIdentity = FlowCont ( (<>)
(view flc_scores flc)
(view flc_cont flc)
) mempty
| otherwise = mergeWith_Double (groupWith g) flc
-- | MergeWith : with stem, we always have an answer -- | MergeWith : with stem, we always have an answer
-- if Maybe lems then we should add it to continuation -- if Maybe lems then we should add it to continuation
mergeWith :: (Text -> Text) mergeWith :: (Text -> Text)
...@@ -103,18 +143,56 @@ mergeWith fun flc = FlowCont scores mempty ...@@ -103,18 +143,56 @@ mergeWith fun flc = FlowCont scores mempty
children = List.concat $ map mapStem (Map.toList $ view gts'_children g) children = List.concat $ map mapStem (Map.toList $ view gts'_children g)
groupWith :: GroupParams -- | MergeWith : with stem, we always have an answer
-> Text -- if Maybe lems then we should add it to continuation
-> Text mergeWith_Double :: (Text -> Text)
groupWith GroupIdentity = identity -> FlowCont Text (GroupedTreeScores Double)
groupWith (GroupParams l _m _n _) = -> FlowCont Text (GroupedTreeScores Double)
Text.intercalate " " mergeWith_Double fun flc = FlowCont scores mempty
. map (stem l) where
-- . take n
. List.sort scores :: Map Text (GroupedTreeScores Double)
-- . (List.filter (\t -> Text.length t > m)) scores = foldl' (alter (mapStems scores')) scores' cont'
. Text.splitOn " " where
. Text.replace "-" " " scores' = view flc_scores flc
cont' = Map.toList $ view flc_cont flc
-- TODO insert at the right place in group hierarchy
-- adding as child of the parent for now
alter :: Map Stem Text
-> Map Text (GroupedTreeScores Double)
-> (Text, GroupedTreeScores Double)
-> Map Text (GroupedTreeScores Double)
alter st target (t,g) = case Map.lookup t st of
Nothing -> Map.alter (alter' (t,g)) t target
Just t' -> Map.alter (alter' (t,g)) t' target
alter' (_t,g) Nothing = Just g
alter' ( t,g) (Just g') = Just $ over gts'_children
( Map.union (Map.singleton t g))
g'
mapStems :: Map Text (GroupedTreeScores Double)
-> Map Stem Text
mapStems = (Map.fromListWith (<>)) . List.concat . (map mapStem) . Map.toList
mapStem :: (Text, GroupedTreeScores Double)
-> [(Stem, Text)]
mapStem (s,g) = parent : children
where
parent = (fun s, s)
children = List.concat $ map mapStem (Map.toList $ view gts'_children g)
-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<-- -- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment