Commit 8f1c001b authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-social-list' into dev

parents 82f3efed 62bd8d8c
......@@ -28,7 +28,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.List.CSV (csvMapTermList)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo)
......
......@@ -19,19 +19,16 @@ module Main where
import Data.ByteString.Lazy (writeFile)
import Data.Maybe (catMaybes)
import Data.Text (pack)
import qualified Data.Text as DT
import Data.Tuple.Extra (both)
import qualified Data.Vector as DV
import qualified Data.Maybe as DMaybe
import Control.Monad (zipWithM)
import Control.Monad.IO.Class
import Data.Map (Map)
import qualified Data.IntMap as DIM
import qualified Data.Map as DM
import GHC.Generics
......@@ -44,7 +41,6 @@ import System.IO (hPutStr, hFlush, stderr)
import System.Environment
import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Concurrent (getNumCapabilities, myThreadId, threadCapability)
import Prelude ((>>))
import Gargantext.Prelude
import Gargantext.Core
......@@ -53,7 +49,7 @@ import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.List.CSV (csvMapTermList)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms (terms)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
......
......@@ -31,7 +31,7 @@ import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.List.CSV (csvMapTermList)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.LevelMaker
......
......@@ -73,7 +73,7 @@ library:
- Gargantext.Core.Text.Corpus.API
- Gargantext.Core.Text.Corpus.Parsers.CSV
- Gargantext.Core.Text.Examples
- Gargantext.Core.Text.List.CSV
- Gargantext.Core.Text.List.Formats.CSV
- Gargantext.Core.Text.Metrics
- Gargantext.Core.Text.Metrics.TFICF
- Gargantext.Core.Text.Metrics.CharByChar
......
......@@ -9,16 +9,18 @@ Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.List
where
import Control.Lens ((^.), set)
import Control.Lens ((^.), set, view)
import Data.Maybe (fromMaybe, catMaybes)
import Data.Ord (Down(..))
import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text)
import qualified Data.Char as Char
import qualified Data.List as List
......@@ -29,12 +31,15 @@ import qualified Data.Text as Text
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core.Text.List.Social (flowSocialList, invertForw)
import Gargantext.Core.Text.List.Social (flowSocialList, flowSocialList', FlowSocialListPriority(..), invertForw)
import Gargantext.Core.Text.List.Social.Scores -- (FlowListScores)
import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Core.Text.Group
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Database.Prelude (CmdM)
import Gargantext.Database.Query.Table.Node (defaultList)
......@@ -66,7 +71,7 @@ buildNgramsLists user gp uCid mCid = do
pure $ Map.unions $ [ngTerms] <> othersTerms
data MapListSize = MapListSize Int
data MapListSize = MapListSize { unMapListSize :: !Int }
buildNgramsOthersList ::( HasNodeError err
, CmdM env err m
......@@ -79,29 +84,38 @@ buildNgramsOthersList ::( HasNodeError err
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
ngs' :: Map Text (Set NodeId) <- getNodesByNgramsUser uCid nt
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt
socialLists' :: Map Text FlowListScores
<- flowSocialList' MySelfFirst user nt (Set.fromList $ Map.keys ngs')
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
printDebug "flowSocialList'" (Map.filter (not . ((==) Map.empty) . view fls_parents) socialLists')
let
grouped = toGroupedText groupIt (Set.size . snd) fst snd
(Map.toList $ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b)) $ ngs)
groupParams = GroupedTextParams groupIt (Set.size . snd) fst snd {-(size . fst)-}
groupedWithList = toGroupedText groupParams socialLists' ngs'
socialLists <- flowSocialList user nt (Set.fromList $ Map.keys ngs)
printDebug "groupedWithList" (Map.map (\v -> (view gt_label v, view gt_children v)) $ Map.filter (\v -> (Set.size $ view gt_children v) > 0) groupedWithList)
let
groupedWithList = map (addListType (invertForw socialLists)) grouped
(stopTerms, tailTerms) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList
(mapTerms, tailTerms') = Map.partition (\t -> t ^. gt_listType == Just MapTerm) tailTerms
listSize = mapListSize - (List.length mapTerms)
(mapTerms', candiTerms) = List.splitAt listSize $ List.sortOn (Down . _gt_score) $ Map.elems tailTerms'
(mapTerms', candiTerms) = List.splitAt listSize
$ List.sortOn (Down . _gt_score)
$ Map.elems tailTerms'
pure $ Map.fromList [( nt, (List.concat $ map toNgramsElement stopTerms)
<> (List.concat $ map toNgramsElement mapTerms)
<> (List.concat $ map toNgramsElement $ map (set gt_listType (Just MapTerm)) mapTerms')
<> (List.concat $ map toNgramsElement $ map (set gt_listType (Just CandidateTerm)) candiTerms)
<> (List.concat $ map toNgramsElement mapTerms )
<> (List.concat $ map toNgramsElement
$ map (set gt_listType (Just MapTerm )) mapTerms' )
<> (List.concat $ map toNgramsElement
$ map (set gt_listType (Just CandidateTerm)) candiTerms)
)]
-- TODO use ListIds
buildNgramsTermsList :: ( HasNodeError err
, CmdM env err m
......@@ -116,18 +130,17 @@ buildNgramsTermsList :: ( HasNodeError err
buildNgramsTermsList user uCid mCid groupParams = do
-- Computing global speGen score
allTerms <- Map.toList <$> getTficf uCid mCid NgramsTerms
allTerms :: Map Text Double <- getTficf uCid mCid NgramsTerms
-- printDebug "head candidates" (List.take 10 $ allTerms)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms)
-- First remove stops terms
socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst allTerms)
socialLists <- flowSocialList user NgramsTerms (Set.fromList $ map fst $ Map.toList allTerms)
-- printDebug "\n * socialLists * \n" socialLists
-- Grouping the ngrams and keeping the maximum score for label
let grouped = toGroupedText (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) allTerms
let grouped = groupedTextWithStem ( GroupedTextParams (ngramsGroup groupParams) identity (const Set.empty) (const Set.empty) {-(size . _gt_label)-} ) allTerms
groupedWithList = map (addListType (invertForw socialLists)) grouped
......@@ -137,7 +150,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "\n * stopTerms * \n" stopTerms
-- splitting monterms and multiterms to take proportional candidates
let
listSizeGlobal = 2000 :: Double -- use % of list if to big, or Int if to small
listSizeGlobal = 2000 :: Double -- use % of list if to big, or Int if too small
monoSize = 0.4 :: Double
multSize = 1 - monoSize
......@@ -172,8 +185,9 @@ buildNgramsTermsList user uCid mCid groupParams = do
$ groupedMonoHead <> groupedMultHead
-- grouping with Set NodeId
contextsAdded = foldl' (\mapGroups' k -> let k' = ngramsGroup groupParams k
in case Map.lookup k' mapGroups' of
contextsAdded = foldl' (\mapGroups' k ->
let k' = ngramsGroup groupParams k in
case Map.lookup k' mapGroups' of
Nothing -> mapGroups'
Just g -> case Map.lookup k mapTextDocIds of
Nothing -> mapGroups'
......@@ -187,6 +201,7 @@ buildNgramsTermsList user uCid mCid groupParams = do
$ Map.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
| (t1, s1) <- mapStemNodeIds
, (t2, s2) <- mapStemNodeIds
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
]
where
mapStemNodeIds = Map.toList $ Map.map (_gt_nodes) contextsAdded
......@@ -253,7 +268,6 @@ buildNgramsTermsList user uCid mCid groupParams = do
-- printDebug "monoScoredInclHead" monoScoredInclHead
-- printDebug "monoScoredExclHead" monoScoredExclTail
--
-- printDebug "multScoredInclHead" multScoredInclHead
-- printDebug "multScoredExclTail" multScoredExclTail
......
{-|
Module : Gargantext.Core.Text.List.CSV
Module : Gargantext.Core.Text.List.Formats.CSV
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
......@@ -12,29 +12,24 @@ CSV parser for Gargantext corpus files.
-}
module Gargantext.Core.Text.List.CSV where
import GHC.IO (FilePath)
module Gargantext.Core.Text.List.Formats.CSV where
import Control.Applicative
import Control.Monad (mzero)
import Data.Char (ord)
import Data.Csv
import Data.Either (Either(Left, Right))
import Data.List (null)
import Data.Text (Text, pack)
import qualified Data.Text as DT
import qualified Data.ByteString.Lazy as BL
import Data.Vector (Vector)
import qualified Data.Vector as V
import Gargantext.Prelude hiding (length)
import GHC.IO (FilePath)
import Gargantext.Core.Text.Context
import Gargantext.Prelude hiding (length)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as DT
import qualified Data.Vector as V
------------------------------------------------------------------------
csvMapTermList :: FilePath -> IO TermList
csvMapTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
......
{-|
Module : Gargantext.Core.Text.List.Group
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.List.Group
where
import Control.Lens (set)
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..))
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
------------------------------------------------------------------------
toGroupedText :: GroupedTextParams a b
-> Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Stem (GroupedText Int)
toGroupedText groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | To be removed
addListType :: Map Text ListType -> GroupedText a -> GroupedText a
addListType m g = set gt_listType (hasListType m g) g
where
hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
hasListType m' (GroupedText _ label _ g' _ _ _) =
List.foldl' (<>) Nothing
$ map (\t -> Map.lookup t m')
$ Set.toList
$ Set.insert label g'
{-|
Module : Gargantext.Core.Text.List.WithScores
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.List.Group.WithScores
where
import Control.Lens (makeLenses, over, view)
import Data.Semigroup
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social.Scores
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
------------------------------------------------------------------------
-- | Main Types
data GroupedWithListScores =
GroupedWithListScores { _gwls_children :: !(Set Text)
, _gwls_listType :: !(Maybe ListType)
}
makeLenses ''GroupedWithListScores
instance Semigroup GroupedWithListScores where
(<>) (GroupedWithListScores c1 l1)
(GroupedWithListScores c2 l2) =
GroupedWithListScores (c1 <> c2) (l1 <> l2)
------
data GroupedTextScores score =
GroupedTextScores { _gts_listType :: !(Maybe ListType)
, _gts_score :: score
, _gts_children :: !(Set Text)
}
makeLenses 'GroupedTextScores
instance Semigroup a => Semigroup (GroupedTextScores a) where
(<>) (GroupedTextScores l1 s1 c1)
(GroupedTextScores l2 s2 c2)
= GroupedTextScores (l1 <> l2) (s1 <> s2) (c1 <> c2)
------------------------------------------------------------------------
-- | Main function
groupWithScores :: Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Text (GroupedTextScores (Set NodeId))
groupWithScores scores ms = foldl' (addScore scores) start (Map.toList ms)
where
start = fromGroupedScores $ fromListScores scores
-- | Add scores depending on being either parent or child or orphan
addScore :: Map Text FlowListScores
-> Map Text (GroupedTextScores (Set NodeId))
-> (Text, Set NodeId)
-> Map Text (GroupedTextScores (Set NodeId))
addScore scores ms (t, ns) = Map.alter (isParent ns) t ms
where
-- is parent case
isParent ns' (Just (GroupedTextScores l s c)) = let ns'' = ns' <> s in Just (GroupedTextScores l ns'' c)
-- is either child or orphan case
isParent ns' Nothing = case Map.lookup t scores of
-- is child case
Just fls -> case keyWithMaxValue $ view fls_parents fls of
Just parent -> over gts_score (<> ns') <$> Map.lookup parent ms
Nothing -> panic "Should not happen"
-- is Orphan case
Nothing -> Just $ GroupedTextScores Nothing ns' Set.empty
------------------------------------------------------------------------
fromGroupedScores :: Map Parent GroupedWithListScores
-> Map Parent (GroupedTextScores (Set NodeId))
fromGroupedScores = Map.map (\(GroupedWithListScores c l) -> GroupedTextScores l Set.empty c)
------------------------------------------------------------------------
fromListScores :: Map Text FlowListScores -> Map Parent GroupedWithListScores
fromListScores = Map.fromListWith (<>) . (map fromScores') . Map.toList
where
fromScores' :: (Text, FlowListScores) -> (Text, GroupedWithListScores)
fromScores' (t, fs) = case (keyWithMaxValue $ view fls_parents fs) of
Nothing -> (t, GroupedWithListScores Set.empty (keyWithMaxValue $ view fls_listType fs))
-- Parent case: taking its listType, for now children Set is empty
Just parent -> (parent, GroupedWithListScores (Set.singleton t) Nothing)
-- We ignore the ListType of children for the parents' one
-- added after and winner of semigroup actions
{-|
Module : Gargantext.Core.Text.Group
Module : Gargantext.Core.Text.List.Group.WithStem
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -10,19 +10,23 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.Group
module Gargantext.Core.Text.List.Group.WithStem
where
import Control.Lens (makeLenses, set)
import Control.Lens (makeLenses, view)
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Data.Semigroup (Semigroup)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.Set as Set
......@@ -30,22 +34,7 @@ import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
{-
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !StopSize
}
| BuilderStep1 { withModel :: !Model }
| BuilderStepN { withModel :: !Model }
| Tficf { nlb_lang :: !Lang
, nlb_group1 :: !Int
, nlb_group2 :: !Int
, nlb_stopSize :: !StopSize
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
-}
-- | Main Types
data StopSize = StopSize {unStopSize :: !Int}
-- | TODO: group with 2 terms only can be
......@@ -59,75 +48,27 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
}
| GroupIdentity
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
mergeMapParent :: Map Text (GroupedText b)
-> Map Text (Map Text Int)
-> Map Text (GroupedText b)
mergeMapParent = undefined
------------------------------------------------------------------------
toGroupedText :: Ord b
=> (Text -> Text )
-> (a -> b )
-> (a -> Set Text )
-> (a -> Set NodeId)
-> [(Text,a)]
-> Map Stem (GroupedText b)
toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
where
group (t,d) = let t' = fun_stem t
in (t', GroupedText
Nothing
t
(fun_score d)
(fun_texts d)
(size t)
t'
(fun_nodeIds d)
)
groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
groupStems = Map.elems . groupStems'
groupStems' :: Ord a => [(Stem, GroupedText a)] -> Map Stem (GroupedText a)
groupStems' = Map.fromListWith grouping
where
grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
data GroupedTextParams a b =
GroupedTextParams { _gt_fun_stem :: Text -> Text
, _gt_fun_score :: a -> b
, _gt_fun_texts :: a -> Set Text
, _gt_fun_nodeIds :: a -> Set NodeId
-- , _gt_fun_size :: a -> Int
}
makeLenses 'GroupedTextParams
------------------------------------------------------------------------
type Group = Lang -> Int -> Int -> Text -> Text
type Stem = Text
type Label = Text
data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Label
, _gt_label :: !Text
, _gt_score :: !score
, _gt_children :: !(Set Text)
, _gt_size :: !Int
, _gt_stem :: !Stem
, _gt_stem :: !Stem -- needed ?
, _gt_nodes :: !(Set NodeId)
} {-deriving Show--}
--{-
} deriving Show --}
{-
instance Show score => Show (GroupedText score) where
show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
--}
......@@ -140,16 +81,69 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where
compare (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = compare score1 score2
-- Lenses Instances
instance Ord a => Semigroup (GroupedText a) where
(<>) (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
-- | Lenses Instances
makeLenses 'GroupedText
------------------------------------------------------------------------
addListType :: Map Text ListType -> GroupedText a -> GroupedText a
addListType m g = set gt_listType (hasListType m g) g
groupWithStem :: {- ( HasNgrams a
, HasGroupWithScores a b
, Semigroup a
, Ord b
)
=> -} GroupedTextParams a b
-> Map Text (GroupedTextScores (Set NodeId))
-> Map Stem (GroupedText Int)
groupWithStem _ = Map.mapWithKey scores2groupedText
scores2groupedText :: Text -> GroupedTextScores (Set NodeId) -> GroupedText Int
scores2groupedText t g = GroupedText (view gts_listType g)
t
(Set.size $ view gts_score g)
(Set.delete t $ view gts_children g)
(size t)
t
(view gts_score g)
------------------------------------------------------------------------
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
groupedTextWithStem :: Ord b
=> GroupedTextParams a b
-> Map Text a
-> Map Stem (GroupedText b)
groupedTextWithStem gparams from =
Map.fromListWith (<>) $ map (group gparams) $ Map.toList from
where
hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
hasListType m' (GroupedText _ label _ g' _ _ _) =
List.foldl' (<>) Nothing
$ map (\t -> Map.lookup t m')
$ Set.toList
$ Set.insert label g'
group gparams' (t,d) = let t' = (view gt_fun_stem gparams') t
in (t', GroupedText
Nothing
t
((view gt_fun_score gparams') d)
((view gt_fun_texts gparams') d)
(size t)
t'
((view gt_fun_nodeIds gparams') d)
)
------------------------------------------------------------------------
This diff is collapsed.
{-|
Module : Gargantext.Core.Text.List.Social.Find
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.Find
where
-- findList imports
import Gargantext.Core.Types.Individu
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Tree
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Prelude
------------------------------------------------------------------------
findListsId :: (HasNodeError err, HasTreeError err)
=> User -> NodeMode -> Cmd err [NodeId]
findListsId u mode = do
r <- getRootId u
ns <- map _dt_nodeId <$> filter (\n -> _dt_typeId n == nodeTypeId NodeList)
<$> findNodes' mode r
pure ns
findNodes' :: HasTreeError err
=> NodeMode -> RootId
-> Cmd err [DbTreeNode]
findNodes' Private r = findNodes Private r $ [NodeFolderPrivate] <> commonNodes
findNodes' Shared r = findNodes Shared r $ [NodeFolderShared ] <> commonNodes
findNodes' Public r = findNodes Public r $ [NodeFolderPublic ] <> commonNodes
commonNodes:: [NodeType]
commonNodes = [NodeFolder, NodeCorpus, NodeList]
{-|
Module : Gargantext.Core.Text.List.Social.ListType
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.ListType
where
import Gargantext.Database.Admin.Types.Node
import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.API.Ngrams.Tools -- (getListNgrams)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Database.Schema.Ngrams
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------------------------------------------------------
-- | [ListId] does not merge the lists (it is for Master and User lists
-- here we need UserList only
countFilterList :: RepoCmdM env err m
=> Set Text -> NgramsType -> [ListId]
-> Map Text (Map ListType Int)
-> m (Map Text (Map ListType Int))
countFilterList st nt ls input =
foldM' (\m l -> countFilterList' st nt [l] m) input ls
where
countFilterList' :: RepoCmdM env err m
=> Set Text -> NgramsType -> [ListId]
-> Map Text (Map ListType Int)
-> m (Map Text (Map ListType Int))
countFilterList' st' nt' ls' input' = do
ml <- toMapTextListType <$> getListNgrams ls' nt'
pure $ Set.foldl' (\m t -> countList t ml m) input' st'
------------------------------------------------------------------------
-- FIXME children have to herit the ListType of the parent
toMapTextListType :: Map Text NgramsRepoElement -> Map Text ListType
toMapTextListType m = Map.fromListWith (<>)
$ List.concat
$ map (toList m)
$ Map.toList m
where
toList :: Map Text NgramsRepoElement -> (Text, NgramsRepoElement) -> [(Text, ListType)]
toList m' (t, nre@(NgramsRepoElement _ _ _ _ (MSet children))) =
List.zip terms (List.cycle [lt'])
where
terms = [t]
-- <> maybe [] (\n -> [unNgramsTerm n]) root
-- <> maybe [] (\n -> [unNgramsTerm n]) parent
<> (map unNgramsTerm $ Map.keys children)
lt' = listOf m' nre
listOf :: Map Text NgramsRepoElement -> NgramsRepoElement -> ListType
listOf m'' ng = case _nre_parent ng of
Nothing -> _nre_list ng
Just p -> case Map.lookup (unNgramsTerm p) m'' of
Just ng' -> listOf m'' ng'
Nothing -> CandidateTerm
-- panic "[G.C.T.L.Social.listOf] Nothing: Should Not happen"
------------------------------------------------------------------------
countList :: Text
-> Map Text ListType
-> Map Text (Map ListType Int)
-> Map Text (Map ListType Int)
countList t m input = case Map.lookup t m of
Nothing -> input
Just l -> Map.alter addList t input
where
addList Nothing = Just $ addCountList l Map.empty
addList (Just lm) = Just $ addCountList l lm
addCountList :: ListType -> Map ListType Int -> Map ListType Int
addCountList l' m' = Map.alter (plus l') l' m'
where
plus CandidateTerm Nothing = Just 1
plus CandidateTerm (Just x) = Just $ x + 1
plus MapTerm Nothing = Just 2
plus MapTerm (Just x) = Just $ x + 2
plus StopTerm Nothing = Just 3
plus StopTerm (Just x) = Just $ x + 3
{-|
Module : Gargantext.Core.Text.List.Social.Scores
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.List.Social.Scores
where
import Control.Lens
import Data.Map (Map)
import Data.Semigroup (Semigroup(..))
import Data.Set (Set)
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------------------------------------------------------
-- | Tools to inherit groupings
------------------------------------------------------------------------
-- | Tools
parentUnionsMerge :: (Ord a, Ord b, Num c)
=> [Map a (Map b c)]
-> Map a (Map b c)
parentUnionsMerge = Map.unionsWith (Map.unionWith (+))
-- This Parent union is specific
-- [Private, Shared, Public]
-- means the following preferences:
-- Private > Shared > Public
-- if data have not been tagged privately, then use others tags
-- This unions behavior takes first key only and ignore others
parentUnionsExcl :: Ord a
=> [Map a b]
-> Map a b
parentUnionsExcl = Map.unions
------------------------------------------------------------------------
type Parent = Text
hasParent :: Text
-> Map Text (Map Parent Int)
-> Maybe Parent
hasParent t m = case Map.lookup t m of
Nothing -> Nothing
Just m' -> keyWithMaxValue m'
------------------------------------------------------------------------
keyWithMaxValue :: Map a b -> Maybe a
keyWithMaxValue m = (fst . fst) <$> Map.maxViewWithKey m
------------------------------------------------------------------------
data FlowListScores =
FlowListScores { _fls_parents :: Map Parent Int
, _fls_listType :: Map ListType Int
-- You can add any score by incrementing this type
-- , _flc_score :: Map Score Int
}
deriving (Show, Generic)
makeLenses ''FlowListScores
instance Semigroup FlowListScores where
(<>) (FlowListScores p1 l1) (FlowListScores p2 l2) =
FlowListScores (p1 <> p2) (l1 <> l2)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | toFlowListScores which generate Score from list of Map Text
-- NgramsRepoElement
toFlowListScores :: KeepAllParents
-> Set Text
-> Map Text FlowListScores
-> [Map Text NgramsRepoElement]
-> Map Text FlowListScores
toFlowListScores k ts = foldl' (toFlowListScores' k ts)
where
toFlowListScores' :: KeepAllParents
-> Set Text
-> Map Text FlowListScores
-> Map Text NgramsRepoElement
-> Map Text FlowListScores
toFlowListScores' k' ts' to' ngramsRepo =
Set.foldl' (toFlowListScores'' k' ts' ngramsRepo) to' ts'
toFlowListScores'' :: KeepAllParents
-> Set Text
-> Map Text NgramsRepoElement
-> Map Text FlowListScores
-> Text
-> Map Text FlowListScores
toFlowListScores'' k'' ss ngramsRepo to'' t =
case Map.lookup t ngramsRepo of
Nothing -> to''
Just nre -> Map.alter (addParent k'' nre ss) t
$ Map.alter (addList $ _nre_list nre) t to''
------------------------------------------------------------------------
-- | Main addFunctions to groupResolution the FlowListScores
-- Use patch-map library here
-- diff, transformWith patches simplifies functions below
addList :: ListType
-> Maybe FlowListScores
-> Maybe FlowListScores
addList l Nothing =
Just $ FlowListScores Map.empty (addList' l Map.empty)
addList l (Just (FlowListScores mapParent mapList)) =
Just $ FlowListScores mapParent mapList'
where
mapList' = addList' l mapList
-- * Unseful but nice comment:
-- "the addList function looks like an ASCII bird"
-- | Concrete function to pass to PatchMap
addList' :: ListType -> Map ListType Int -> Map ListType Int
addList' l m = Map.alter (plus l) l m
where
plus CandidateTerm Nothing = Just 1
plus CandidateTerm (Just x) = Just $ x + 1
plus MapTerm Nothing = Just 2
plus MapTerm (Just x) = Just $ x + 2
plus StopTerm Nothing = Just 3
plus StopTerm (Just x) = Just $ x + 3
------------------------------------------------------------------------
------------------------------------------------------------------------
data KeepAllParents = KeepAllParents Bool
addParent :: KeepAllParents -> NgramsRepoElement -> Set Text
-> Maybe FlowListScores
-> Maybe FlowListScores
addParent k nre ss Nothing =
Just $ FlowListScores mapParent Map.empty
where
mapParent = addParent' k (_nre_parent nre) ss Map.empty
addParent k nre ss (Just (FlowListScores mapParent mapList)) =
Just $ FlowListScores mapParent' mapList
where
mapParent' = addParent' k (_nre_parent nre) ss mapParent
addParent' :: Num a
=> KeepAllParents
-> Maybe NgramsTerm
-> Set Text
-> Map Text a
-> Map Text a
addParent' _ Nothing _ss mapParent = mapParent
addParent' (KeepAllParents k) (Just (NgramsTerm p')) ss mapParent =
case k of
True -> Map.alter addCount p' mapParent
False -> case Set.member p' ss of
False -> mapParent
True -> Map.alter addCount p' mapParent
where
addCount Nothing = Just 1
addCount (Just n) = Just $ n + 1
------------------------------------------------------------------------
......@@ -50,7 +50,7 @@ instance ToSchema NodeTree where
type TypeId = Int
-- TODO multiple ListType declaration, remove it
-- data ListType = CandidateTerm | StopTerm | MapTerm
data ListType = StopTerm | CandidateTerm | MapTerm
data ListType = CandidateTerm | StopTerm | MapTerm
deriving (Generic, Eq, Ord, Show, Read, Enum, Bounded)
instance ToJSON ListType
......@@ -81,7 +81,11 @@ listTypeId CandidateTerm = 1
listTypeId MapTerm = 2
fromListTypeId :: ListTypeId -> Maybe ListType
fromListTypeId i = lookup i $ fromList [ (listTypeId l, l) | l <- [minBound..maxBound]]
fromListTypeId i = lookup i
$ fromList
[ (listTypeId l, l)
| l <- [StopTerm, CandidateTerm, MapTerm]
]
-- data Metrics = Occurrences | Cooccurrences | Specclusion | Genclusion | Cvalue
-- | TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal
......
......@@ -64,9 +64,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.Text.List.Group.WithStem (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Group (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment