Commit 8f1c001b authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-social-list' into dev

parents 82f3efed 62bd8d8c
...@@ -28,7 +28,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) ...@@ -28,7 +28,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year) import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile) import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.List.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo) import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo)
......
...@@ -12,26 +12,23 @@ Main specifications to index a corpus with a term list ...@@ -12,26 +12,23 @@ Main specifications to index a corpus with a term list
-} -}
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-} {-# LANGUAGE Strict #-}
module Main where module Main where
import Data.ByteString.Lazy (writeFile) import Data.ByteString.Lazy (writeFile)
import Data.Maybe (catMaybes)
import Data.Text (pack) import Data.Text (pack)
import qualified Data.Text as DT import qualified Data.Text as DT
import Data.Tuple.Extra (both) import Data.Tuple.Extra (both)
import qualified Data.Vector as DV import qualified Data.Vector as DV
import qualified Data.Maybe as DMaybe
import Control.Monad (zipWithM) import Control.Monad (zipWithM)
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Data.Map (Map) import Data.Map (Map)
import qualified Data.IntMap as DIM
import qualified Data.Map as DM import qualified Data.Map as DM
import GHC.Generics import GHC.Generics
...@@ -44,7 +41,6 @@ import System.IO (hPutStr, hFlush, stderr) ...@@ -44,7 +41,6 @@ import System.IO (hPutStr, hFlush, stderr)
import System.Environment import System.Environment
import Control.Concurrent.Async as CCA (mapConcurrently) import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Concurrent (getNumCapabilities, myThreadId, threadCapability) import Control.Concurrent (getNumCapabilities, myThreadId, threadCapability)
import Prelude ((>>))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core import Gargantext.Core
...@@ -53,7 +49,7 @@ import Gargantext.Core.Text.Terms ...@@ -53,7 +49,7 @@ import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Context import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year) import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.List.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms (terms) import Gargantext.Core.Text.Terms (terms)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs) import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
......
...@@ -31,7 +31,7 @@ import Gargantext.Prelude ...@@ -31,7 +31,7 @@ import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile) import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year) import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.List.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.LevelMaker import Gargantext.Core.Viz.Phylo.LevelMaker
......
...@@ -73,7 +73,7 @@ library: ...@@ -73,7 +73,7 @@ library:
- Gargantext.Core.Text.Corpus.API - Gargantext.Core.Text.Corpus.API
- Gargantext.Core.Text.Corpus.Parsers.CSV - Gargantext.Core.Text.Corpus.Parsers.CSV
- Gargantext.Core.Text.Examples - Gargantext.Core.Text.Examples
- Gargantext.Core.Text.List.CSV - Gargantext.Core.Text.List.Formats.CSV
- Gargantext.Core.Text.Metrics - Gargantext.Core.Text.Metrics
- Gargantext.Core.Text.Metrics.TFICF - Gargantext.Core.Text.Metrics.TFICF
- Gargantext.Core.Text.Metrics.CharByChar - Gargantext.Core.Text.Metrics.CharByChar
......
...@@ -9,16 +9,18 @@ Portability : POSIX ...@@ -9,16 +9,18 @@ Portability : POSIX
-} -}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.List module Gargantext.Core.Text.List
where where
import Control.Lens ((^.), set) import Control.Lens ((^.), set, view)
import Data.Maybe (fromMaybe, catMaybes) import Data.Maybe (fromMaybe, catMaybes)
import Data.Ord (Down(..)) import Data.Ord (Down(..))
import Data.Map (Map) import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Char as Char import qualified Data.Char as Char
import qualified Data.List as List import qualified Data.List as List
...@@ -29,12 +31,15 @@ import qualified Data.Text as Text ...@@ -29,12 +31,15 @@ import qualified Data.Text as Text
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..)) -- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList) import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.API.Ngrams.Types (RepoCmdM) import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core.Text.List.Social (flowSocialList, invertForw) import Gargantext.Core.Text.List.Social (flowSocialList, flowSocialList', FlowSocialListPriority(..), invertForw)
import Gargantext.Core.Text.List.Social.Scores -- (FlowListScores)
import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal) import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Core.Text.Group
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId) import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser) import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf) import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Database.Prelude (CmdM) import Gargantext.Database.Prelude (CmdM)
import Gargantext.Database.Query.Table.Node (defaultList) import Gargantext.Database.Query.Table.Node (defaultList)
...@@ -66,7 +71,7 @@ buildNgramsLists user gp uCid mCid = do ...@@ -66,7 +71,7 @@ buildNgramsLists user gp uCid mCid = do
pure $ Map.unions $ [ngTerms] <> othersTerms pure $ Map.unions $ [ngTerms] <> othersTerms
data MapListSize = MapListSize Int data MapListSize = MapListSize { unMapListSize :: !Int }
buildNgramsOthersList ::( HasNodeError err buildNgramsOthersList ::( HasNodeError err
, CmdM env err m , CmdM env err m
...@@ -79,29 +84,38 @@ buildNgramsOthersList ::( HasNodeError err ...@@ -79,29 +84,38 @@ buildNgramsOthersList ::( HasNodeError err
-> (NgramsType, MapListSize) -> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
ngs' :: Map Text (Set NodeId) <- getNodesByNgramsUser uCid nt
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt socialLists' :: Map Text FlowListScores
<- flowSocialList' MySelfFirst user nt (Set.fromList $ Map.keys ngs')
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
let printDebug "flowSocialList'" (Map.filter (not . ((==) Map.empty) . view fls_parents) socialLists')
grouped = toGroupedText groupIt (Set.size . snd) fst snd
(Map.toList $ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b)) $ ngs)
socialLists <- flowSocialList user nt (Set.fromList $ Map.keys ngs) let
groupParams = GroupedTextParams groupIt (Set.size . snd) fst snd {-(size . fst)-}
groupedWithList = toGroupedText groupParams socialLists' ngs'
printDebug "groupedWithList" (Map.map (\v -> (view gt_label v, view gt_children v)) $ Map.filter (\v -> (Set.size $ view gt_children v) > 0) groupedWithList)
let let
groupedWithList = map (addListType (invertForw socialLists)) grouped
(stopTerms, tailTerms) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList (stopTerms, tailTerms) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList
(mapTerms, tailTerms') = Map.partition (\t -> t ^. gt_listType == Just MapTerm) tailTerms (mapTerms, tailTerms') = Map.partition (\t -> t ^. gt_listType == Just MapTerm) tailTerms
listSize = mapListSize - (List.length mapTerms) listSize = mapListSize - (List.length mapTerms)
(mapTerms', candiTerms) = List.splitAt listSize $ List.sortOn (Down . _gt_score) $ Map.elems tailTerms' (mapTerms', candiTerms) = List.splitAt listSize
$ List.sortOn (Down . _gt_score)
pure $ Map.fromList [( nt, (List.concat $ map toNgramsElement stopTerms) $ Map.elems tailTerms'
<> (List.concat $ map toNgramsElement mapTerms)
<> (List.concat $ map toNgramsElement $ map (set gt_listType (Just MapTerm)) mapTerms') pure $ Map.fromList [( nt, (List.concat $ map toNgramsElement stopTerms)
<> (List.concat $ map toNgramsElement $ map (set gt_listType (Just CandidateTerm)) candiTerms) <> (List.concat $ map toNgramsElement mapTerms )
<> (List.concat $ map toNgramsElement
$ map (set gt_listType (Just MapTerm )) mapTerms' )
<> (List.concat $ map toNgramsElement
$ map (set gt_listType (Just CandidateTerm)) candiTerms)
)] )]
-- TODO use ListIds -- TODO use ListIds
buildNgramsTermsList :: ( HasNodeError err buildNgramsTermsList :: ( HasNodeError err
, CmdM env err m , CmdM env err m
...@@ -116,18 +130,17 @@ buildNgramsTermsList :: ( HasNodeError err ...@@ -116,18 +130,17 @@ buildNgramsTermsList :: ( HasNodeError err
buildNgramsTermsList user uCid mCid groupParams = do buildNgramsTermsList user uCid mCid groupParams = do
-- Computing global speGen score -- Computing global speGen score
allTerms <- Map.toList <$> getTficf uCid mCid NgramsTerms allTerms :: Map Text Double <- getTficf uCid mCid NgramsTerms
-- printDebug "head candidates" (List.take 10 $ allTerms) -- printDebug "head candidates" (List.take 10 $ allTerms)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms) -- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms)
-- First remove stops terms -- First remove stops terms
socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst allTerms) socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst $ Map.toList allTerms)
-- printDebug "\n * socialLists * \n" socialLists -- printDebug "\n * socialLists * \n" socialLists
-- Grouping the ngrams and keeping the maximum score for label -- Grouping the ngrams and keeping the maximum score for label
let grouped = toGroupedText (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) allTerms let grouped = groupedTextWithStem ( GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped groupedWithList = map (addListType (invertForw socialLists)) grouped
...@@ -137,7 +150,7 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -137,7 +150,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "\n * stopTerms * \n" stopTerms -- printDebug "\n * stopTerms * \n" stopTerms
-- splitting monterms and multiterms to take proportional candidates -- splitting monterms and multiterms to take proportional candidates
let let
listSizeGlobal = 2000 :: Double -- use % of list if to big, or Int if to small listSizeGlobal = 2000 :: Double -- use % of list if to big, or Int if too small
monoSize = 0.4 :: Double monoSize = 0.4 :: Double
multSize = 1 - monoSize multSize = 1 - monoSize
...@@ -172,12 +185,13 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -172,12 +185,13 @@ buildNgramsTermsList user uCid mCid groupParams = do
$ groupedMonoHead <> groupedMultHead $ groupedMonoHead <> groupedMultHead
-- grouping with Set NodeId -- grouping with Set NodeId
contextsAdded = foldl' (\mapGroups' k -> let k' = ngramsGroup groupParams k contextsAdded = foldl' (\mapGroups' k ->
in case Map.lookup k' mapGroups' of let k' = ngramsGroup groupParams k in
Nothing -> mapGroups' case Map.lookup k' mapGroups' of
Just g -> case Map.lookup k mapTextDocIds of Nothing -> mapGroups'
Nothing -> mapGroups' Just g -> case Map.lookup k mapTextDocIds of
Just ns -> Map.insert k' ( g { _gt_nodes = Set.union ns (_gt_nodes g)}) mapGroups' Nothing -> mapGroups'
Just ns -> Map.insert k' ( g { _gt_nodes = Set.union ns (_gt_nodes g)}) mapGroups'
) )
mapGroups mapGroups
$ Map.keys mapTextDocIds $ Map.keys mapTextDocIds
...@@ -187,6 +201,7 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -187,6 +201,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
$ Map.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2) $ Map.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
| (t1, s1) <- mapStemNodeIds | (t1, s1) <- mapStemNodeIds
, (t2, s2) <- mapStemNodeIds , (t2, s2) <- mapStemNodeIds
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
] ]
where where
mapStemNodeIds = Map.toList $ Map.map (_gt_nodes) contextsAdded mapStemNodeIds = Map.toList $ Map.map (_gt_nodes) contextsAdded
...@@ -253,7 +268,6 @@ buildNgramsTermsList user uCid mCid groupParams = do ...@@ -253,7 +268,6 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "monoScoredInclHead" monoScoredInclHead -- printDebug "monoScoredInclHead" monoScoredInclHead
-- printDebug "monoScoredExclHead" monoScoredExclTail -- printDebug "monoScoredExclHead" monoScoredExclTail
--
-- printDebug "multScoredInclHead" multScoredInclHead -- printDebug "multScoredInclHead" multScoredInclHead
-- printDebug "multScoredExclTail" multScoredExclTail -- printDebug "multScoredExclTail" multScoredExclTail
......
{-| {-|
Module : Gargantext.Core.Text.List.CSV Module : Gargantext.Core.Text.List.Formats.CSV
Description : Description :
Copyright : (c) CNRS, 2018-Present Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -12,29 +12,24 @@ CSV parser for Gargantext corpus files. ...@@ -12,29 +12,24 @@ CSV parser for Gargantext corpus files.
-} -}
module Gargantext.Core.Text.List.CSV where module Gargantext.Core.Text.List.Formats.CSV where
import GHC.IO (FilePath)
import Control.Applicative import Control.Applicative
import Control.Monad (mzero) import Control.Monad (mzero)
import Data.Char (ord) import Data.Char (ord)
import Data.Csv import Data.Csv
import Data.Either (Either(Left, Right)) import Data.Either (Either(Left, Right))
import Data.List (null) import Data.List (null)
import Data.Text (Text, pack) import Data.Text (Text, pack)
import qualified Data.Text as DT
import qualified Data.ByteString.Lazy as BL
import Data.Vector (Vector) import Data.Vector (Vector)
import qualified Data.Vector as V import GHC.IO (FilePath)
import Gargantext.Prelude hiding (length)
import Gargantext.Core.Text.Context import Gargantext.Core.Text.Context
import Gargantext.Prelude hiding (length)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as DT
import qualified Data.Vector as V
------------------------------------------------------------------------ ------------------------------------------------------------------------
csvMapTermList :: FilePath -> IO TermList csvMapTermList :: FilePath -> IO TermList
csvMapTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp csvMapTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
......
{-|
Module : Gargantext.Core.Text.List.Group
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.List.Group
where
import Control.Lens (set)
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..))
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
------------------------------------------------------------------------
toGroupedText :: GroupedTextParams a b
-> Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Stem (GroupedText Int)
toGroupedText groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | To be removed
addListType :: Map Text ListType -> GroupedText a -> GroupedText a
addListType m g = set gt_listType (hasListType m g) g
where
hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
hasListType m' (GroupedText _ label _ g' _ _ _) =
List.foldl' (<>) Nothing
$ map (\t -> Map.lookup t m')
$ Set.toList
$ Set.insert label g'
{-|
Module : Gargantext.Core.Text.List.WithScores
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.List.Group.WithScores
where
import Control.Lens (makeLenses, over, view)
import Data.Semigroup
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social.Scores
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
------------------------------------------------------------------------
-- | Main Types
data GroupedWithListScores =
GroupedWithListScores { _gwls_children :: !(Set Text)
, _gwls_listType :: !(Maybe ListType)
}
makeLenses ''GroupedWithListScores
instance Semigroup GroupedWithListScores where
(<>) (GroupedWithListScores c1 l1)
(GroupedWithListScores c2 l2) =
GroupedWithListScores (c1 <> c2) (l1 <> l2)
------
data GroupedTextScores score =
GroupedTextScores { _gts_listType :: !(Maybe ListType)
, _gts_score :: score
, _gts_children :: !(Set Text)
}
makeLenses 'GroupedTextScores
instance Semigroup a => Semigroup (GroupedTextScores a) where
(<>) (GroupedTextScores l1 s1 c1)
(GroupedTextScores l2 s2 c2)
= GroupedTextScores (l1 <> l2) (s1 <> s2) (c1 <> c2)
------------------------------------------------------------------------
-- | Main function
groupWithScores :: Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Text (GroupedTextScores (Set NodeId))
groupWithScores scores ms = foldl' (addScore scores) start (Map.toList ms)
where
start = fromGroupedScores $ fromListScores scores
-- | Add scores depending on being either parent or child or orphan
addScore :: Map Text FlowListScores
-> Map Text (GroupedTextScores (Set NodeId))
-> (Text, Set NodeId)
-> Map Text (GroupedTextScores (Set NodeId))
addScore scores ms (t, ns) = Map.alter (isParent ns) t ms
where
-- is parent case
isParent ns' (Just (GroupedTextScores l s c)) = let ns'' = ns' <> s in Just (GroupedTextScores l ns'' c)
-- is either child or orphan case
isParent ns' Nothing = case Map.lookup t scores of
-- is child case
Just fls -> case keyWithMaxValue $ view fls_parents fls of
Just parent -> over gts_score (<> ns') <$> Map.lookup parent ms
Nothing -> panic "Should not happen"
-- is Orphan case
Nothing -> Just $ GroupedTextScores Nothing ns' Set.empty
------------------------------------------------------------------------
fromGroupedScores :: Map Parent GroupedWithListScores
-> Map Parent (GroupedTextScores (Set NodeId))
fromGroupedScores = Map.map (\(GroupedWithListScores c l) -> GroupedTextScores l Set.empty c)
------------------------------------------------------------------------
fromListScores :: Map Text FlowListScores -> Map Parent GroupedWithListScores
fromListScores = Map.fromListWith (<>) . (map fromScores') . Map.toList
where
fromScores' :: (Text, FlowListScores) -> (Text, GroupedWithListScores)
fromScores' (t, fs) = case (keyWithMaxValue $ view fls_parents fs) of
Nothing -> (t, GroupedWithListScores Set.empty (keyWithMaxValue $ view fls_listType fs))
-- Parent case: taking its listType, for now children Set is empty
Just parent -> (parent, GroupedWithListScores (Set.singleton t) Nothing)
-- We ignore the ListType of children for the parents' one
-- added after and winner of semigroup actions
{-| {-|
Module : Gargantext.Core.Text.Group Module : Gargantext.Core.Text.List.Group.WithStem
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -9,20 +9,24 @@ Portability : POSIX ...@@ -9,20 +9,24 @@ Portability : POSIX
-} -}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.Group module Gargantext.Core.Text.List.Group.WithStem
where where
import Control.Lens (makeLenses, set) import Control.Lens (makeLenses, view)
import Data.Set (Set) import Data.Set (Set)
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Data.Semigroup (Semigroup)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size) import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId) import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..)) import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -30,22 +34,7 @@ import qualified Data.Map as Map ...@@ -30,22 +34,7 @@ import qualified Data.Map as Map
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Text as Text import qualified Data.Text as Text
{- -- | Main Types
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !StopSize
}
| BuilderStep1 { withModel :: !Model }
| BuilderStepN { withModel :: !Model }
| Tficf { nlb_lang :: !Lang
, nlb_group1 :: !Int
, nlb_group2 :: !Int
, nlb_stopSize :: !StopSize
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
-}
data StopSize = StopSize {unStopSize :: !Int} data StopSize = StopSize {unStopSize :: !Int}
-- | TODO: group with 2 terms only can be -- | TODO: group with 2 terms only can be
...@@ -59,75 +48,27 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang ...@@ -59,75 +48,27 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
} }
| GroupIdentity | GroupIdentity
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
mergeMapParent :: Map Text (GroupedText b)
-> Map Text (Map Text Int)
-> Map Text (GroupedText b)
mergeMapParent = undefined
------------------------------------------------------------------------ data GroupedTextParams a b =
toGroupedText :: Ord b GroupedTextParams { _gt_fun_stem :: Text -> Text
=> (Text -> Text ) , _gt_fun_score :: a -> b
-> (a -> b ) , _gt_fun_texts :: a -> Set Text
-> (a -> Set Text ) , _gt_fun_nodeIds :: a -> Set NodeId
-> (a -> Set NodeId) -- , _gt_fun_size :: a -> Int
-> [(Text,a)] }
-> Map Stem (GroupedText b) makeLenses 'GroupedTextParams
toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
where
group (t,d) = let t' = fun_stem t
in (t', GroupedText
Nothing
t
(fun_score d)
(fun_texts d)
(size t)
t'
(fun_nodeIds d)
)
groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
groupStems = Map.elems . groupStems'
groupStems' :: Ord a => [(Stem, GroupedText a)] -> Map Stem (GroupedText a)
groupStems' = Map.fromListWith grouping
where
grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
------------------------------------------------------------------------
type Group = Lang -> Int -> Int -> Text -> Text
type Stem = Text type Stem = Text
type Label = Text
data GroupedText score = data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType) GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Label , _gt_label :: !Text
, _gt_score :: !score , _gt_score :: !score
, _gt_children :: !(Set Text) , _gt_children :: !(Set Text)
, _gt_size :: !Int , _gt_size :: !Int
, _gt_stem :: !Stem , _gt_stem :: !Stem -- needed ?
, _gt_nodes :: !(Set NodeId) , _gt_nodes :: !(Set NodeId)
} {-deriving Show--} } deriving Show --}
--{- {-
instance Show score => Show (GroupedText score) where instance Show score => Show (GroupedText score) where
show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
--} --}
...@@ -140,16 +81,69 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where ...@@ -140,16 +81,69 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where
compare (GroupedText _ _ score1 _ _ _ _) compare (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = compare score1 score2 (GroupedText _ _ score2 _ _ _ _) = compare score1 score2
-- Lenses Instances instance Ord a => Semigroup (GroupedText a) where
(<>) (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
-- | Lenses Instances
makeLenses 'GroupedText makeLenses 'GroupedText
------------------------------------------------------------------------ ------------------------------------------------------------------------
addListType :: Map Text ListType -> GroupedText a -> GroupedText a groupWithStem :: {- ( HasNgrams a
addListType m g = set gt_listType (hasListType m g) g , HasGroupWithScores a b
where , Semigroup a
hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType , Ord b
hasListType m' (GroupedText _ label _ g' _ _ _) = )
List.foldl' (<>) Nothing => -} GroupedTextParams a b
$ map (\t -> Map.lookup t m') -> Map Text (GroupedTextScores (Set NodeId))
$ Set.toList -> Map Stem (GroupedText Int)
$ Set.insert label g' groupWithStem _ = Map.mapWithKey scores2groupedText
scores2groupedText :: Text -> GroupedTextScores (Set NodeId) -> GroupedText Int
scores2groupedText t g = GroupedText (view gts_listType g)
t
(Set.size $ view gts_score g)
(Set.delete t $ view gts_children g)
(size t)
t
(view gts_score g)
------------------------------------------------------------------------
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
groupedTextWithStem :: Ord b
=> GroupedTextParams a b
-> Map Text a
-> Map Stem (GroupedText b)
groupedTextWithStem gparams from =
Map.fromListWith (<>) $ map (group gparams) $ Map.toList from
where
group gparams' (t,d) = let t' = (view gt_fun_stem gparams') t
in (t', GroupedText
Nothing
t
((view gt_fun_score gparams') d)
((view gt_fun_texts gparams') d)
(size t)
t'
((view gt_fun_nodeIds gparams') d)
)
------------------------------------------------------------------------
...@@ -11,27 +11,24 @@ Portability : POSIX ...@@ -11,27 +11,24 @@ Portability : POSIX
module Gargantext.Core.Text.List.Social module Gargantext.Core.Text.List.Social
where where
-- findList imports
import Gargantext.Core.Types.Individu
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Tree
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Prelude
-- filterList imports
import Data.Maybe (fromMaybe)
import Data.Map (Map) import Data.Map (Map)
import Data.Set (Set) import Data.Maybe (fromMaybe)
import Data.Semigroup (Semigroup(..)) import Data.Semigroup (Semigroup(..))
import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.API.Ngrams.Tools -- (getListNgrams) import Gargantext.API.Ngrams.Tools -- (getListNgrams)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.List.Social.Find
import Gargantext.Core.Text.List.Social.ListType
import Gargantext.Core.Text.List.Social.Scores
import Gargantext.Core.Types.Individu
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Tree
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import qualified Data.List as List import Gargantext.Prelude
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -44,71 +41,104 @@ flowSocialList :: ( RepoCmdM env err m ...@@ -44,71 +41,104 @@ flowSocialList :: ( RepoCmdM env err m
=> User -> NgramsType -> Set Text => User -> NgramsType -> Set Text
-> m (Map ListType (Set Text)) -> m (Map ListType (Set Text))
flowSocialList user nt ngrams' = do flowSocialList user nt ngrams' = do
privateLists <- flowSocialListByMode Private user nt ngrams' -- Here preference to privateLists (discutable: let user choice)
privateListIds <- findListsId user Private
privateLists <- flowSocialListByMode privateListIds nt ngrams'
-- printDebug "* privateLists *: \n" privateLists -- printDebug "* privateLists *: \n" privateLists
-- here preference to privateLists (discutable)
sharedLists <- flowSocialListByMode Shared user nt (termsByList CandidateTerm privateLists) sharedListIds <- findListsId user Shared
sharedLists <- flowSocialListByMode sharedListIds nt (termsByList CandidateTerm privateLists)
-- printDebug "* sharedLists *: \n" sharedLists -- printDebug "* sharedLists *: \n" sharedLists
-- TODO publicMapList
let result = unions [ Map.mapKeys (fromMaybe CandidateTerm) privateLists -- TODO publicMapList:
, Map.mapKeys (fromMaybe CandidateTerm) sharedLists -- Note: if both produce 3 identic repetition => refactor mode
] -- publicListIds <- findListsId Public user
-- publicLists <- flowSocialListByMode' publicListIds nt (termsByList CandidateTerm privateLists)
let result = parentUnionsExcl
[ Map.mapKeys (fromMaybe CandidateTerm) privateLists
, Map.mapKeys (fromMaybe CandidateTerm) sharedLists
-- , Map.mapKeys (fromMaybe CandidateTerm) publicLists
]
-- printDebug "* socialLists *: results \n" result -- printDebug "* socialLists *: results \n" result
pure result pure result
------------------------------------------------------------------------ ------------------------------------------------------------------------
unions :: (Ord a, Semigroup a, Semigroup b, Ord b) ------------------------------------------------------------------------
=> [Map a (Set b)] -> Map a (Set b) -- | FlowSocialListPriority
unions = invertBack . Map.unionsWith (<>) . map invertForw -- Sociological assumption: either private or others (public) first
-- This parameter depends on the user choice
invertForw :: (Ord b, Semigroup a) => Map a (Set b) -> Map b a data FlowSocialListPriority = MySelfFirst | OthersFirst
invertForw = Map.unionsWith (<>)
. (map (\(k,sets) -> Map.fromSet (\_ -> k) sets))
. Map.toList
invertBack :: (Ord a, Ord b) => Map b a -> Map a (Set b)
invertBack = Map.fromListWith (<>)
. (map (\(b,a) -> (a, Set.singleton b)))
. Map.toList
unions_test :: Map ListType (Set Text) flowSocialListPriority :: FlowSocialListPriority -> [NodeMode]
unions_test = unions [m1, m2] flowSocialListPriority MySelfFirst = [Private, Shared{-, Public -}]
where flowSocialListPriority OthersFirst = reverse $ flowSocialListPriority MySelfFirst
m1 = Map.fromList [ (StopTerm , Set.singleton "Candidate")]
m2 = Map.fromList [ (CandidateTerm, Set.singleton "Candidate")
, (MapTerm , Set.singleton "Candidate")
]
------------------------------------------------------------------------ ------------------------------------------------------------------------
termsByList :: ListType -> (Map (Maybe ListType) (Set Text)) -> Set Text flowSocialList' :: ( RepoCmdM env err m
termsByList CandidateTerm m = Set.unions , CmdM env err m
$ map (\lt -> fromMaybe Set.empty $ Map.lookup lt m) , HasNodeError err
[ Nothing, Just CandidateTerm ] , HasTreeError err
termsByList l m = )
fromMaybe Set.empty $ Map.lookup (Just l) m => FlowSocialListPriority
-> User -> NgramsType -> Set Text
-> m (Map Text FlowListScores)
flowSocialList' flowPriority user nt ngrams' =
parentUnionsExcl <$> mapM (flowSocialListByMode' user nt ngrams')
(flowSocialListPriority flowPriority)
------------------------------------------------------------------------
flowSocialListByMode :: ( RepoCmdM env err m flowSocialListByMode :: ( RepoCmdM env err m
, CmdM env err m , CmdM env err m
, HasNodeError err , HasNodeError err
, HasTreeError err , HasTreeError err
) )
=> NodeMode -> User -> NgramsType -> Set Text => [NodeId]-> NgramsType -> Set Text
-> m (Map (Maybe ListType) (Set Text)) -> m (Map (Maybe ListType) (Set Text))
flowSocialListByMode mode user nt ngrams' = do flowSocialListByMode [] _nt ngrams' = pure $ Map.fromList [(Nothing, ngrams')]
listIds <- findListsId mode user flowSocialListByMode listIds nt ngrams' = do
case listIds of counts <- countFilterList ngrams' nt listIds Map.empty
[] -> pure $ Map.fromList [(Nothing, ngrams')] let r = toSocialList counts ngrams'
_ -> do pure r
counts <- countFilterList ngrams' nt listIds Map.empty
-- printDebug "flowSocialListByMode counts" counts
let r = toSocialList counts ngrams' flowSocialListByMode' :: ( RepoCmdM env err m
-- printDebug "flowSocialListByMode r" r , CmdM env err m
pure r , HasNodeError err
, HasTreeError err
)
=> User -> NgramsType -> Set Text -> NodeMode
-> m (Map Text FlowListScores)
flowSocialListByMode' user nt st mode =
findListsId user mode
>>= flowSocialListByModeWith nt st
flowSocialListByModeWith :: ( RepoCmdM env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> NgramsType -> Set Text -> [NodeId]
-> m (Map Text FlowListScores)
flowSocialListByModeWith nt st ns =
mapM (\l -> getListNgrams [l] nt) ns
>>= pure
. toFlowListScores (keepAllParents nt) st Map.empty
-- | We keep the parents for all ngrams but terms
keepAllParents :: NgramsType -> KeepAllParents
keepAllParents NgramsTerms = KeepAllParents False
keepAllParents _ = KeepAllParents True
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO: maybe use social groups too -- TODO: maybe use social groups too
-- | TODO what if equality ?
-- choice depends on Ord instance of ListType
-- for now : data ListType = StopTerm | CandidateTerm | MapTerm
-- means MapTerm > CandidateTerm > StopTerm in case of equality of counts
-- (we minimize errors on MapTerms if doubt)
toSocialList :: Map Text (Map ListType Int) toSocialList :: Map Text (Map ListType Int)
-> Set Text -> Set Text
-> Map (Maybe ListType) (Set Text) -> Map (Maybe ListType) (Set Text)
...@@ -116,11 +146,6 @@ toSocialList m = Map.fromListWith (<>) ...@@ -116,11 +146,6 @@ toSocialList m = Map.fromListWith (<>)
. Set.toList . Set.toList
. Set.map (toSocialList1 m) . Set.map (toSocialList1 m)
-- | TODO what if equality ?
-- choice depends on Ord instance of ListType
-- for now : data ListType = StopTerm | CandidateTerm | MapTerm
-- means MapTerm > CandidateTerm > StopTerm in case of equality of counts
-- (we minimize errors on MapTerms if doubt)
toSocialList1 :: Map Text (Map ListType Int) toSocialList1 :: Map Text (Map ListType Int)
-> Text -> Text
-> (Maybe ListType, Set Text) -> (Maybe ListType, Set Text)
...@@ -141,159 +166,34 @@ toSocialList1_testIsTrue = result == (Just MapTerm, Set.singleton token) ...@@ -141,159 +166,34 @@ toSocialList1_testIsTrue = result == (Just MapTerm, Set.singleton token)
] ]
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | [ListId] does not merge the lists (it is for Master and User lists -- | Tools
-- here we need UserList only
countFilterList :: RepoCmdM env err m
=> Set Text -> NgramsType -> [ListId]
-> Map Text (Map ListType Int)
-> m (Map Text (Map ListType Int))
countFilterList st nt ls input =
foldM' (\m l -> countFilterList' st nt [l] m) input ls
countFilterList' :: RepoCmdM env err m
=> Set Text -> NgramsType -> [ListId]
-> Map Text (Map ListType Int)
-> m (Map Text (Map ListType Int))
countFilterList' st nt ls input = do
ml <- toMapTextListType <$> getListNgrams ls nt
-- printDebug "countFilterList'" ml
pure $ Set.foldl' (\m t -> countList t ml m) input st
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- FIXME children have to herit the ListType of the parent termsByList :: ListType -> (Map (Maybe ListType) (Set Text)) -> Set Text
toMapTextListType :: Map Text NgramsRepoElement -> Map Text ListType termsByList CandidateTerm m = Set.unions
toMapTextListType m = Map.fromListWith (<>) $ map (\lt -> fromMaybe Set.empty $ Map.lookup lt m)
$ List.concat [ Nothing, Just CandidateTerm ]
$ map (toList m) termsByList l m =
$ Map.toList m fromMaybe Set.empty $ Map.lookup (Just l) m
----------------------
-- | Tools to inherit groupings
----------------------
type Parent = Text
parentUnionsMerge :: (Ord a, Ord b, Num c)
=> [Map a (Map b c)]
-> Map a (Map b c)
parentUnionsMerge = Map.unionsWith (Map.unionWith (+))
-- This Parent union is specific
-- [Private, Shared, Public]
-- means the following preferences:
-- Private > Shared > Public
-- if data have not been tagged privately, then use others tags
-- This unions behavior takes first key only and ignore others
parentUnionsExcl :: Ord a
=> [Map a b]
-> Map a b
parentUnionsExcl = Map.unions
hasParent :: Text
-> Map Text (Map Parent Int)
-> Maybe Parent
hasParent t m = case Map.lookup t m of
Nothing -> Nothing
Just m' -> (fst . fst) <$> Map.maxViewWithKey m'
toMapTextParent :: Set Text
-> Map Text (Map Parent Int)
-> [Map Text NgramsRepoElement]
-> Map Text (Map Parent Int)
toMapTextParent ts = foldl' (toMapTextParent' ts)
where
toMapTextParent' :: Set Text
-> Map Text (Map Parent Int)
-> Map Text NgramsRepoElement
-> Map Text (Map Parent Int)
toMapTextParent' ts' to from = Set.foldl' (toMapTextParent'' ts' from) to ts'
toMapTextParent'' :: Set Text
-> Map Text NgramsRepoElement
-> Map Text (Map Parent Int)
-> Text
-> Map Text (Map Parent Int)
toMapTextParent'' ss from to t = case Map.lookup t from of
Nothing -> to
Just nre -> case _nre_parent nre of
Just (NgramsTerm p') -> if Set.member p' ss
then Map.alter (addParent p') t to
else to
where
addParent p'' Nothing = Just $ addCountParent p'' Map.empty
addParent p'' (Just ps) = Just $ addCountParent p'' ps
addCountParent :: Parent -> Map Parent Int -> Map Parent Int
addCountParent p m = Map.alter addCount p m
where
addCount Nothing = Just 1
addCount (Just n) = Just $ n + 1
_ -> to
------------------------------------------------------------------------ ------------------------------------------------------------------------
toList :: Map Text NgramsRepoElement -> (Text, NgramsRepoElement) -> [(Text, ListType)] unions :: (Ord a, Semigroup a, Semigroup b, Ord b)
toList m (t, nre@(NgramsRepoElement _ _ _ _ (MSet children))) = => [Map a (Set b)] -> Map a (Set b)
List.zip terms (List.cycle [lt']) unions = invertBack . Map.unionsWith (<>) . map invertForw
where
terms = [t]
-- <> maybe [] (\n -> [unNgramsTerm n]) root
-- <> maybe [] (\n -> [unNgramsTerm n]) parent
<> (map unNgramsTerm $ Map.keys children)
lt' = listOf m nre
listOf :: Map Text NgramsRepoElement -> NgramsRepoElement -> ListType invertForw :: (Ord b, Semigroup a) => Map a (Set b) -> Map b a
listOf m ng = case _nre_parent ng of invertForw = Map.unionsWith (<>)
Nothing -> _nre_list ng . (map (\(k,sets) -> Map.fromSet (\_ -> k) sets))
Just p -> case Map.lookup (unNgramsTerm p) m of . Map.toList
Just ng' -> listOf m ng'
Nothing -> CandidateTerm
-- panic "[G.C.T.L.Social.listOf] Nothing: Should Not happen"
------------------------------------------------------------------------ invertBack :: (Ord a, Ord b) => Map b a -> Map a (Set b)
countList :: Text invertBack = Map.fromListWith (<>)
-> Map Text ListType . (map (\(b,a) -> (a, Set.singleton b)))
-> Map Text (Map ListType Int) . Map.toList
-> Map Text (Map ListType Int)
countList t m input = case Map.lookup t m of
Nothing -> input
Just l -> Map.alter addList t input
where
addList Nothing = Just $ addCountList l Map.empty
addList (Just lm) = Just $ addCountList l lm
addCountList :: ListType -> Map ListType Int -> Map ListType Int unions_test :: Map ListType (Set Text)
addCountList l m = Map.alter (plus l) l m unions_test = unions [m1, m2]
where where
plus CandidateTerm Nothing = Just 1 m1 = Map.fromList [ (StopTerm , Set.singleton "Candidate")]
plus CandidateTerm (Just x) = Just $ x + 1 m2 = Map.fromList [ (CandidateTerm, Set.singleton "Candidate")
, (MapTerm , Set.singleton "Candidate")
plus _ Nothing = Just 3 ]
plus _ (Just x) = Just $ x + 3
------------------------------------------------------------------------
findListsId :: (HasNodeError err, HasTreeError err)
=> NodeMode -> User -> Cmd err [NodeId]
findListsId mode u = do
r <- getRootId u
ns <- map _dt_nodeId <$> filter (\n -> _dt_typeId n == nodeTypeId NodeList)
<$> findNodes' mode r
-- printDebug "findListsIds" ns
pure ns
findNodes' :: HasTreeError err
=> NodeMode -> RootId
-> Cmd err [DbTreeNode]
findNodes' Private r = findNodes Private r $ [NodeFolderPrivate] <> commonNodes
findNodes' Shared r = findNodes Shared r $ [NodeFolderShared ] <> commonNodes
findNodes' Public r = findNodes Public r $ [NodeFolderPublic ] <> commonNodes
commonNodes:: [NodeType]
commonNodes = [NodeFolder, NodeCorpus, NodeList]
{-|
Module : Gargantext.Core.Text.List.Social.Find
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.Find
where
-- findList imports
import Gargantext.Core.Types.Individu
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Tree
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Prelude
------------------------------------------------------------------------
findListsId :: (HasNodeError err, HasTreeError err)
=> User -> NodeMode -> Cmd err [NodeId]
findListsId u mode = do
r <- getRootId u
ns <- map _dt_nodeId <$> filter (\n -> _dt_typeId n == nodeTypeId NodeList)
<$> findNodes' mode r
pure ns
findNodes' :: HasTreeError err
=> NodeMode -> RootId
-> Cmd err [DbTreeNode]
findNodes' Private r = findNodes Private r $ [NodeFolderPrivate] <> commonNodes
findNodes' Shared r = findNodes Shared r $ [NodeFolderShared ] <> commonNodes
findNodes' Public r = findNodes Public r $ [NodeFolderPublic ] <> commonNodes
commonNodes:: [NodeType]
commonNodes = [NodeFolder, NodeCorpus, NodeList]
{-|
Module : Gargantext.Core.Text.List.Social.ListType
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.ListType
where
import Gargantext.Database.Admin.Types.Node
import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.API.Ngrams.Tools -- (getListNgrams)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Database.Schema.Ngrams
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------------------------------------------------------
-- | [ListId] does not merge the lists (it is for Master and User lists
-- here we need UserList only
countFilterList :: RepoCmdM env err m
=> Set Text -> NgramsType -> [ListId]
-> Map Text (Map ListType Int)
-> m (Map Text (Map ListType Int))
countFilterList st nt ls input =
foldM' (\m l -> countFilterList' st nt [l] m) input ls
where
countFilterList' :: RepoCmdM env err m
=> Set Text -> NgramsType -> [ListId]
-> Map Text (Map ListType Int)
-> m (Map Text (Map ListType Int))
countFilterList' st' nt' ls' input' = do
ml <- toMapTextListType <$> getListNgrams ls' nt'
pure $ Set.foldl' (\m t -> countList t ml m) input' st'
------------------------------------------------------------------------
-- FIXME children have to herit the ListType of the parent
toMapTextListType :: Map Text NgramsRepoElement -> Map Text ListType
toMapTextListType m = Map.fromListWith (<>)
$ List.concat
$ map (toList m)
$ Map.toList m
where
toList :: Map Text NgramsRepoElement -> (Text, NgramsRepoElement) -> [(Text, ListType)]
toList m' (t, nre@(NgramsRepoElement _ _ _ _ (MSet children))) =
List.zip terms (List.cycle [lt'])
where
terms = [t]
-- <> maybe [] (\n -> [unNgramsTerm n]) root
-- <> maybe [] (\n -> [unNgramsTerm n]) parent
<> (map unNgramsTerm $ Map.keys children)
lt' = listOf m' nre
listOf :: Map Text NgramsRepoElement -> NgramsRepoElement -> ListType
listOf m'' ng = case _nre_parent ng of
Nothing -> _nre_list ng
Just p -> case Map.lookup (unNgramsTerm p) m'' of
Just ng' -> listOf m'' ng'
Nothing -> CandidateTerm
-- panic "[G.C.T.L.Social.listOf] Nothing: Should Not happen"
------------------------------------------------------------------------
countList :: Text
-> Map Text ListType
-> Map Text (Map ListType Int)
-> Map Text (Map ListType Int)
countList t m input = case Map.lookup t m of
Nothing -> input
Just l -> Map.alter addList t input
where
addList Nothing = Just $ addCountList l Map.empty
addList (Just lm) = Just $ addCountList l lm
addCountList :: ListType -> Map ListType Int -> Map ListType Int
addCountList l' m' = Map.alter (plus l') l' m'
where
plus CandidateTerm Nothing = Just 1
plus CandidateTerm (Just x) = Just $ x + 1
plus MapTerm Nothing = Just 2
plus MapTerm (Just x) = Just $ x + 2
plus StopTerm Nothing = Just 3
plus StopTerm (Just x) = Just $ x + 3
{-|
Module : Gargantext.Core.Text.List.Social.Scores
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.List.Social.Scores
where
import Control.Lens
import Data.Map (Map)
import Data.Semigroup (Semigroup(..))
import Data.Set (Set)
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------------------------------------------------------
-- | Tools to inherit groupings
------------------------------------------------------------------------
-- | Tools
parentUnionsMerge :: (Ord a, Ord b, Num c)
=> [Map a (Map b c)]
-> Map a (Map b c)
parentUnionsMerge = Map.unionsWith (Map.unionWith (+))
-- This Parent union is specific
-- [Private, Shared, Public]
-- means the following preferences:
-- Private > Shared > Public
-- if data have not been tagged privately, then use others tags
-- This unions behavior takes first key only and ignore others
parentUnionsExcl :: Ord a
=> [Map a b]
-> Map a b
parentUnionsExcl = Map.unions
------------------------------------------------------------------------
type Parent = Text
hasParent :: Text
-> Map Text (Map Parent Int)
-> Maybe Parent
hasParent t m = case Map.lookup t m of
Nothing -> Nothing
Just m' -> keyWithMaxValue m'
------------------------------------------------------------------------
keyWithMaxValue :: Map a b -> Maybe a
keyWithMaxValue m = (fst . fst) <$> Map.maxViewWithKey m
------------------------------------------------------------------------
data FlowListScores =
FlowListScores { _fls_parents :: Map Parent Int
, _fls_listType :: Map ListType Int
-- You can add any score by incrementing this type
-- , _flc_score :: Map Score Int
}
deriving (Show, Generic)
makeLenses ''FlowListScores
instance Semigroup FlowListScores where
(<>) (FlowListScores p1 l1) (FlowListScores p2 l2) =
FlowListScores (p1 <> p2) (l1 <> l2)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | toFlowListScores which generate Score from list of Map Text
-- NgramsRepoElement
toFlowListScores :: KeepAllParents
-> Set Text
-> Map Text FlowListScores
-> [Map Text NgramsRepoElement]
-> Map Text FlowListScores
toFlowListScores k ts = foldl' (toFlowListScores' k ts)
where
toFlowListScores' :: KeepAllParents
-> Set Text
-> Map Text FlowListScores
-> Map Text NgramsRepoElement
-> Map Text FlowListScores
toFlowListScores' k' ts' to' ngramsRepo =
Set.foldl' (toFlowListScores'' k' ts' ngramsRepo) to' ts'
toFlowListScores'' :: KeepAllParents
-> Set Text
-> Map Text NgramsRepoElement
-> Map Text FlowListScores
-> Text
-> Map Text FlowListScores
toFlowListScores'' k'' ss ngramsRepo to'' t =
case Map.lookup t ngramsRepo of
Nothing -> to''
Just nre -> Map.alter (addParent k'' nre ss) t
$ Map.alter (addList $ _nre_list nre) t to''
------------------------------------------------------------------------
-- | Main addFunctions to groupResolution the FlowListScores
-- Use patch-map library here
-- diff, transformWith patches simplifies functions below
addList :: ListType
-> Maybe FlowListScores
-> Maybe FlowListScores
addList l Nothing =
Just $ FlowListScores Map.empty (addList' l Map.empty)
addList l (Just (FlowListScores mapParent mapList)) =
Just $ FlowListScores mapParent mapList'
where
mapList' = addList' l mapList
-- * Unseful but nice comment:
-- "the addList function looks like an ASCII bird"
-- | Concrete function to pass to PatchMap
addList' :: ListType -> Map ListType Int -> Map ListType Int
addList' l m = Map.alter (plus l) l m
where
plus CandidateTerm Nothing = Just 1
plus CandidateTerm (Just x) = Just $ x + 1
plus MapTerm Nothing = Just 2
plus MapTerm (Just x) = Just $ x + 2
plus StopTerm Nothing = Just 3
plus StopTerm (Just x) = Just $ x + 3
------------------------------------------------------------------------
------------------------------------------------------------------------
data KeepAllParents = KeepAllParents Bool
addParent :: KeepAllParents -> NgramsRepoElement -> Set Text
-> Maybe FlowListScores
-> Maybe FlowListScores
addParent k nre ss Nothing =
Just $ FlowListScores mapParent Map.empty
where
mapParent = addParent' k (_nre_parent nre) ss Map.empty
addParent k nre ss (Just (FlowListScores mapParent mapList)) =
Just $ FlowListScores mapParent' mapList
where
mapParent' = addParent' k (_nre_parent nre) ss mapParent
addParent' :: Num a
=> KeepAllParents
-> Maybe NgramsTerm
-> Set Text
-> Map Text a
-> Map Text a
addParent' _ Nothing _ss mapParent = mapParent
addParent' (KeepAllParents k) (Just (NgramsTerm p')) ss mapParent =
case k of
True -> Map.alter addCount p' mapParent
False -> case Set.member p' ss of
False -> mapParent
True -> Map.alter addCount p' mapParent
where
addCount Nothing = Just 1
addCount (Just n) = Just $ n + 1
------------------------------------------------------------------------
...@@ -50,7 +50,7 @@ instance ToSchema NodeTree where ...@@ -50,7 +50,7 @@ instance ToSchema NodeTree where
type TypeId = Int type TypeId = Int
-- TODO multiple ListType declaration, remove it -- TODO multiple ListType declaration, remove it
-- data ListType = CandidateTerm | StopTerm | MapTerm -- data ListType = CandidateTerm | StopTerm | MapTerm
data ListType = StopTerm | CandidateTerm | MapTerm data ListType = CandidateTerm | StopTerm | MapTerm
deriving (Generic, Eq, Ord, Show, Read, Enum, Bounded) deriving (Generic, Eq, Ord, Show, Read, Enum, Bounded)
instance ToJSON ListType instance ToJSON ListType
...@@ -81,7 +81,11 @@ listTypeId CandidateTerm = 1 ...@@ -81,7 +81,11 @@ listTypeId CandidateTerm = 1
listTypeId MapTerm = 2 listTypeId MapTerm = 2
fromListTypeId :: ListTypeId -> Maybe ListType fromListTypeId :: ListTypeId -> Maybe ListType
fromListTypeId i = lookup i $ fromList [ (listTypeId l, l) | l <- [minBound..maxBound]] fromListTypeId i = lookup i
$ fromList
[ (listTypeId l, l)
| l <- [StopTerm, CandidateTerm, MapTerm]
]
-- data Metrics = Occurrences | Cooccurrences | Specclusion | Genclusion | Cvalue -- data Metrics = Occurrences | Cooccurrences | Specclusion | Genclusion | Cvalue
-- | TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal -- | TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal
......
...@@ -64,9 +64,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName) ...@@ -64,9 +64,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile) import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Core.Flow.Types import Gargantext.Core.Flow.Types
import Gargantext.Core.Text import Gargantext.Core.Text
import Gargantext.Core.Text.List.Group.WithStem (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat) import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Group (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..)) import Gargantext.Core.Types (Terms(..))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment