Commit f73a9d90 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[REFACT] FlowList integration to Terms with instances

parent a188045f
......@@ -61,7 +61,7 @@ buildNgramsLists :: ( RepoCmdM env err m
-> MasterCorpusId
-> m (Map NgramsType [NgramsElement])
buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user uCid mCid gp
ngTerms <- buildNgramsTermsList user uCid mCid gp (NgramsTerms, MapListSize 350)
othersTerms <- mapM (buildNgramsOthersList user uCid GroupIdentity)
[ (Authors , MapListSize 9)
, (Sources , MapListSize 9)
......@@ -83,14 +83,14 @@ buildNgramsOthersList ::( HasNodeError err
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
ngs' :: Map Text (Set NodeId) <- getNodesByNgramsUser uCid nt
buildNgramsOthersList user uCid groupParams (nt, MapListSize mapListSize) = do
allTerms :: Map Text (Set NodeId) <- getNodesByNgramsUser uCid nt
-- | PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists' :: FlowCont Text FlowListScores
<- flowSocialList' MySelfFirst user nt ( FlowCont Map.empty
$ Map.fromList
$ List.zip (Map.keys ngs')
$ List.zip (Map.keys allTerms)
(List.cycle [mempty])
)
{-
......@@ -100,10 +100,12 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
-}
let
groupedWithList = toGroupedTreeText groupIt socialLists' ngs'
groupedWithList = toGroupedTree groupParams socialLists' allTerms
{-
printDebug "groupedWithList"
$ view flc_cont groupedWithList
-}
let
(stopTerms, tailTerms) = Map.partition ((== Just StopTerm) . viewListType) $ view flc_scores groupedWithList
......@@ -132,23 +134,40 @@ buildNgramsTermsList :: ( HasNodeError err
-> UserCorpusId
-> MasterCorpusId
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsTermsList user uCid mCid groupParams = do
buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
-- Computing global speGen score
allTerms :: Map Text Double <- getTficf uCid mCid NgramsTerms
allTerms :: Map Text Double <- getTficf uCid mCid nt
-- printDebug "head candidates" (List.take 10 $ allTerms)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms)
-- | PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists' :: FlowCont Text FlowListScores
<- flowSocialList' MySelfFirst user nt ( FlowCont Map.empty
$ Map.fromList
$ List.zip (Map.keys allTerms)
(List.cycle [mempty])
)
{-
printDebug "flowSocialList'"
$ Map.filter (not . ((==) Map.empty) . (view fls_parents))
$ view flc_scores socialLists'
let
groupedWithList = toGroupedTree groupParams socialLists' allTerms
-}
-- TODO remove
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- First remove stops terms
socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst $ Map.toList allTerms)
-- printDebug "\n * socialLists * \n" socialLists
socialLists <- flowSocialList user nt (Set.fromList $ map fst $ Map.toList allTerms)
-- Grouping the ngrams and keeping the maximum score for label
let grouped = groupedTextWithStem ( GroupedTextParams (groupWith groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
(stopTerms, candidateTerms) = Map.partition ((== Just StopTerm) . viewListType) groupedWithList
(groupedMono, groupedMult) = Map.partition (\t -> t ^. gt_size < 2) candidateTerms
......@@ -188,7 +207,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
mapTextDocIds <- getNodesByNgramsOnlyUser uCid
[userListId, masterListId]
NgramsTerms
nt
selectedTerms
let
......@@ -284,9 +303,9 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "multScoredExclTail" multScoredExclTail
let result = Map.unionsWith (<>)
[ Map.fromList [( NgramsTerms, (List.concat $ map toNgramsElement $ termListHead)
<> (List.concat $ map toNgramsElement $ termListTail)
<> (List.concat $ map toNgramsElement $ stopTerms)
[ Map.fromList [( nt, (List.concat $ map toNgramsElement $ termListHead)
<> (List.concat $ map toNgramsElement $ termListTail)
<> (List.concat $ map toNgramsElement $ stopTerms)
)]
]
-- printDebug "\n result \n" r
......
......@@ -13,7 +13,7 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Group
where
......@@ -22,6 +22,7 @@ import Control.Lens (set, view)
import Data.Set (Set)
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Monoid (mempty)
import Data.Text (Text)
import Gargantext.Core.Types (ListType(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
......@@ -36,23 +37,50 @@ import qualified Data.List as List
------------------------------------------------------------------------
-- | TODO add group with stemming
toGroupedTreeText :: GroupParams
class ToGroupedTree a b | a -> b where
toGroupedTree :: GroupParams
-> FlowCont Text FlowListScores
-> a
-> FlowCont Text (GroupedTreeScores b)
instance ToGroupedTree (Map Text (Set NodeId)) (Set NodeId)
where
toGroupedTree :: GroupParams
-> FlowCont Text FlowListScores
-> Map Text (Set NodeId)
-- -> Map Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
toGroupedTreeText groupParams flc scores = {-view flc_scores-} flow2
where
flow1 = groupWithScores' flc scoring
scoring t = fromMaybe Set.empty $ Map.lookup t scores
toGroupedTree groupParams flc scores = {-view flc_scores-} flow2
where
flow1 = groupWithScores'' flc scoring
scoring t = fromMaybe Set.empty $ Map.lookup t scores
flow2 = case (view flc_cont flow1) == Map.empty of
True -> flow1
False -> groupWithStem' groupParams flow1
flow2 = case (view flc_cont flow1) == Map.empty of
True -> flow1
False -> groupWithStem' groupParams flow1
{-
instance ToGroupedTree (Map Text Double) Double
where
toGroupedTree :: GroupParams
-> FlowCont Text FlowListScores
-> Map Text Double
-- -> Map Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores Double)
toGroupedTree groupParams flc scores = {-view flc_scores-} flow2
where
flow1 = groupWithScores'' flc scoring
scoring t = fromMaybe mempty $ Map.lookup t scores
flow2 = case (view flc_cont flow1) == Map.empty of
True -> flow1
False -> groupWithStem' groupParams flow1
-}
------------------------------------------------------------------------
------------------------------------------------------------------------
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- | TODO To be removed
toGroupedText :: GroupedTextParams a b
-> Map Text FlowListScores
......
......@@ -10,6 +10,7 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Group.WithScores
where
......@@ -31,8 +32,19 @@ import qualified Data.Set as Set
------------------------------------------------------------------------
------------------------------------------------------------------------
class GroupWithScore a where
groupWithScores'' :: FlowCont Text FlowListScores
-> (Text -> a) -- Map Text (Set NodeId)
-> FlowCont Text (GroupedTreeScores a)
------------------------------------------------------------------------
-- | Main function
instance GroupWithScore (Set NodeId) where
groupWithScores'' = groupWithScores'
groupWithScores' :: FlowCont Text FlowListScores
-> (Text -> Set NodeId) -- Map Text (Set NodeId)
-> FlowCont Text (GroupedTreeScores (Set NodeId))
......
......@@ -35,6 +35,7 @@ import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
------------------------------------------------------------------------
-- | Main Types
data StopSize = StopSize {unStopSize :: !Int}
deriving (Eq)
......@@ -52,16 +53,55 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
deriving (Eq)
------------------------------------------------------------------------
groupWithStem' :: GroupParams
class GroupWithStem a where
groupWithStem' :: GroupParams
-> FlowCont Text (GroupedTreeScores a)
-> FlowCont Text (GroupedTreeScores a)
-- TODO factorize groupWithStem_*
instance GroupWithStem (Set NodeId) where
groupWithStem' = groupWithStem_SetNodeId
instance GroupWithStem Double where
groupWithStem' = groupWithStem_Double
------------------------------------------------------------------------
groupWith :: GroupParams
-> Text
-> Text
groupWith GroupIdentity = identity
groupWith (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
groupWithStem_SetNodeId :: GroupParams
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
groupWithStem' g flc
groupWithStem_SetNodeId g flc
| g == GroupIdentity = FlowCont ( (<>)
(view flc_scores flc)
(view flc_cont flc)
) mempty
| otherwise = mergeWith (groupWith g) flc
groupWithStem_Double :: GroupParams
-> FlowCont Text (GroupedTreeScores Double)
-> FlowCont Text (GroupedTreeScores Double)
groupWithStem_Double g flc
| g == GroupIdentity = FlowCont ( (<>)
(view flc_scores flc)
(view flc_cont flc)
) mempty
| otherwise = mergeWith_Double (groupWith g) flc
-- | MergeWith : with stem, we always have an answer
-- if Maybe lems then we should add it to continuation
mergeWith :: (Text -> Text)
......@@ -103,18 +143,56 @@ mergeWith fun flc = FlowCont scores mempty
children = List.concat $ map mapStem (Map.toList $ view gts'_children g)
groupWith :: GroupParams
-> Text
-> Text
groupWith GroupIdentity = identity
groupWith (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
-- | MergeWith : with stem, we always have an answer
-- if Maybe lems then we should add it to continuation
mergeWith_Double :: (Text -> Text)
-> FlowCont Text (GroupedTreeScores Double)
-> FlowCont Text (GroupedTreeScores Double)
mergeWith_Double fun flc = FlowCont scores mempty
where
scores :: Map Text (GroupedTreeScores Double)
scores = foldl' (alter (mapStems scores')) scores' cont'
where
scores' = view flc_scores flc
cont' = Map.toList $ view flc_cont flc
-- TODO insert at the right place in group hierarchy
-- adding as child of the parent for now
alter :: Map Stem Text
-> Map Text (GroupedTreeScores Double)
-> (Text, GroupedTreeScores Double)
-> Map Text (GroupedTreeScores Double)
alter st target (t,g) = case Map.lookup t st of
Nothing -> Map.alter (alter' (t,g)) t target
Just t' -> Map.alter (alter' (t,g)) t' target
alter' (_t,g) Nothing = Just g
alter' ( t,g) (Just g') = Just $ over gts'_children
( Map.union (Map.singleton t g))
g'
mapStems :: Map Text (GroupedTreeScores Double)
-> Map Stem Text
mapStems = (Map.fromListWith (<>)) . List.concat . (map mapStem) . Map.toList
mapStem :: (Text, GroupedTreeScores Double)
-> [(Stem, Text)]
mapStem (s,g) = parent : children
where
parent = (fun s, s)
children = List.concat $ map mapStem (Map.toList $ view gts'_children g)
-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment