Commit 3fa450cd authored by Alexandre Delanoë's avatar Alexandre Delanoë

[NgramsGroup] refact parameters

parent 907c4bab
...@@ -51,13 +51,16 @@ data StopSize = StopSize {unStopSize :: !Int} ...@@ -51,13 +51,16 @@ data StopSize = StopSize {unStopSize :: !Int}
-- a first grouping option to user and get some -- a first grouping option to user and get some
-- enriched data to better learn and improve that algo -- enriched data to better learn and improve that algo
data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_len :: !Int
, unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize
}
ngramsGroup :: Lang ngramsGroup :: GroupParams
-> Int
-> Int
-> Text -> Text
-> Text -> Text
ngramsGroup l _m _n = Text.intercalate " " ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
. map (stem l) . map (stem l)
-- . take n -- . take n
. List.sort . List.sort
......
...@@ -29,7 +29,6 @@ import qualified Data.Text as Text ...@@ -29,7 +29,6 @@ import qualified Data.Text as Text
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..)) -- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList) import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.API.Ngrams.Types (RepoCmdM) import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size) import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Social (flowSocialList, invertForw) import Gargantext.Core.Text.List.Social (flowSocialList, invertForw)
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal) import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
...@@ -53,15 +52,12 @@ buildNgramsLists :: ( RepoCmdM env err m ...@@ -53,15 +52,12 @@ buildNgramsLists :: ( RepoCmdM env err m
, HasNodeError err , HasNodeError err
) )
=> User => User
-> Lang -> GroupParams
-> Int
-> Int
-> StopSize
-> UserCorpusId -> UserCorpusId
-> MasterCorpusId -> MasterCorpusId
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsLists user l n m s uCid mCid = do buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user l n m s uCid mCid ngTerms <- buildNgramsTermsList user gp uCid mCid
othersTerms <- mapM (buildNgramsOthersList user uCid identity) othersTerms <- mapM (buildNgramsOthersList user uCid identity)
[Authors, Sources, Institutes] [Authors, Sources, Institutes]
pure $ Map.unions $ othersTerms <> [ngTerms] pure $ Map.unions $ othersTerms <> [ngTerms]
...@@ -105,14 +101,11 @@ buildNgramsTermsList :: ( HasNodeError err ...@@ -105,14 +101,11 @@ buildNgramsTermsList :: ( HasNodeError err
, HasTreeError err , HasTreeError err
) )
=> User => User
-> Lang -> GroupParams
-> Int
-> Int
-> StopSize
-> UserCorpusId -> UserCorpusId
-> MasterCorpusId -> MasterCorpusId
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsTermsList user l n m _s uCid mCid = do buildNgramsTermsList user groupParams uCid mCid = do
-- Computing global speGen score -- Computing global speGen score
allTerms <- Map.toList <$> getTficf uCid mCid NgramsTerms allTerms <- Map.toList <$> getTficf uCid mCid NgramsTerms
...@@ -137,7 +130,7 @@ buildNgramsTermsList user l n m _s uCid mCid = do ...@@ -137,7 +130,7 @@ buildNgramsTermsList user l n m _s uCid mCid = do
-- Grouping the ngrams and keeping the maximum score for label -- Grouping the ngrams and keeping the maximum score for label
let grouped = groupStems' let grouped = groupStems'
$ map (\(t,d) -> let stem = ngramsGroup l n m t $ map (\(t,d) -> let stem = ngramsGroup groupParams t
in ( stem in ( stem
, GroupedText Nothing t d Set.empty (size t) stem Set.empty , GroupedText Nothing t d Set.empty (size t) stem Set.empty
) )
...@@ -186,7 +179,7 @@ buildNgramsTermsList user l n m _s uCid mCid = do ...@@ -186,7 +179,7 @@ buildNgramsTermsList user l n m _s uCid mCid = do
$ groupedMonoHead <> groupedMultHead $ groupedMonoHead <> groupedMultHead
-- grouping with Set NodeId -- grouping with Set NodeId
contextsAdded = foldl' (\mapGroups' k -> let k' = ngramsGroup l n m k contextsAdded = foldl' (\mapGroups' k -> let k' = ngramsGroup groupParams k
in case Map.lookup k' mapGroups' of in case Map.lookup k' mapGroups' of
Nothing -> mapGroups' Nothing -> mapGroups'
Just g -> case Map.lookup k mapTextDocIds of Just g -> case Map.lookup k mapTextDocIds of
......
...@@ -66,7 +66,7 @@ import Gargantext.Core.Flow.Types ...@@ -66,7 +66,7 @@ import Gargantext.Core.Flow.Types
import Gargantext.Core.Text import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat) import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Group (StopSize(..)) import Gargantext.Core.Text.Group (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..)) import Gargantext.Core.Types (Terms(..))
...@@ -221,7 +221,7 @@ flowCorpusUser l user corpusName ctype ids = do ...@@ -221,7 +221,7 @@ flowCorpusUser l user corpusName ctype ids = do
-- User List Flow -- User List Flow
(masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype (masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
ngs <- buildNgramsLists user l 2 3 (StopSize 3) userCorpusId masterCorpusId ngs <- buildNgramsLists user (GroupParams l 2 3 (StopSize 3)) userCorpusId masterCorpusId
_userListId <- flowList_DbRepo listId ngs _userListId <- flowList_DbRepo listId ngs
_mastListId <- getOrMkList masterCorpusId masterUserId _mastListId <- getOrMkList masterCorpusId masterUserId
-- _ <- insertOccsUpdates userCorpusId mastListId -- _ <- insertOccsUpdates userCorpusId mastListId
......
...@@ -16,6 +16,7 @@ Ngrams by node enable contextual metrics. ...@@ -16,6 +16,7 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Action.Metrics.NgramsByNode module Gargantext.Database.Action.Metrics.NgramsByNode
where where
import Data.Map.Strict (Map, fromListWith, elems, toList) import Data.Map.Strict (Map, fromListWith, elems, toList)
import Data.Map.Strict.Patch (PatchMap, Replace, diff) import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set) import Data.Set (Set)
...@@ -24,18 +25,14 @@ import Data.Tuple.Extra (second, swap) ...@@ -24,18 +25,14 @@ import Data.Tuple.Extra (second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace) import Debug.Trace (trace)
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Database.PostgreSQL.Simple as DPS
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Config (nodeTypeId) import Gargantext.Database.Admin.Config (nodeTypeId)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId) import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery) import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..)) import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Database.PostgreSQL.Simple as DPS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment