Commit 3fa450cd authored by Alexandre Delanoë's avatar Alexandre Delanoë

[NgramsGroup] refact parameters

parent 907c4bab
Pipeline #1153 failed with stage
......@@ -51,13 +51,16 @@ data StopSize = StopSize {unStopSize :: !Int}
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_len :: !Int
, unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize
}
ngramsGroup :: Lang
-> Int
-> Int
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup l _m _n = Text.intercalate " "
ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
......
......@@ -29,7 +29,6 @@ import qualified Data.Text as Text
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Social (flowSocialList, invertForw)
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
......@@ -53,15 +52,12 @@ buildNgramsLists :: ( RepoCmdM env err m
, HasNodeError err
)
=> User
-> Lang
-> Int
-> Int
-> StopSize
-> GroupParams
-> UserCorpusId
-> MasterCorpusId
-> m (Map NgramsType [NgramsElement])
buildNgramsLists user l n m s uCid mCid = do
ngTerms <- buildNgramsTermsList user l n m s uCid mCid
buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user gp uCid mCid
othersTerms <- mapM (buildNgramsOthersList user uCid identity)
[Authors, Sources, Institutes]
pure $ Map.unions $ othersTerms <> [ngTerms]
......@@ -105,14 +101,11 @@ buildNgramsTermsList :: ( HasNodeError err
, HasTreeError err
)
=> User
-> Lang
-> Int
-> Int
-> StopSize
-> GroupParams
-> UserCorpusId
-> MasterCorpusId
-> m (Map NgramsType [NgramsElement])
buildNgramsTermsList user l n m _s uCid mCid = do
buildNgramsTermsList user groupParams uCid mCid = do
-- Computing global speGen score
allTerms <- Map.toList <$> getTficf uCid mCid NgramsTerms
......@@ -137,7 +130,7 @@ buildNgramsTermsList user l n m _s uCid mCid = do
-- Grouping the ngrams and keeping the maximum score for label
let grouped = groupStems'
$ map (\(t,d) -> let stem = ngramsGroup l n m t
$ map (\(t,d) -> let stem = ngramsGroup groupParams t
in ( stem
, GroupedText Nothing t d Set.empty (size t) stem Set.empty
)
......@@ -186,7 +179,7 @@ buildNgramsTermsList user l n m _s uCid mCid = do
$ groupedMonoHead <> groupedMultHead
-- grouping with Set NodeId
contextsAdded = foldl' (\mapGroups' k -> let k' = ngramsGroup l n m k
contextsAdded = foldl' (\mapGroups' k -> let k' = ngramsGroup groupParams k
in case Map.lookup k' mapGroups' of
Nothing -> mapGroups'
Just g -> case Map.lookup k mapTextDocIds of
......
......@@ -66,7 +66,7 @@ import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Group (StopSize(..))
import Gargantext.Core.Text.Group (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..))
......@@ -221,7 +221,7 @@ flowCorpusUser l user corpusName ctype ids = do
-- User List Flow
(masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
ngs <- buildNgramsLists user l 2 3 (StopSize 3) userCorpusId masterCorpusId
ngs <- buildNgramsLists user (GroupParams l 2 3 (StopSize 3)) userCorpusId masterCorpusId
_userListId <- flowList_DbRepo listId ngs
_mastListId <- getOrMkList masterCorpusId masterUserId
-- _ <- insertOccsUpdates userCorpusId mastListId
......
......@@ -16,6 +16,7 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Action.Metrics.NgramsByNode
where
import Data.Map.Strict (Map, fromListWith, elems, toList)
import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set)
......@@ -24,18 +25,14 @@ import Data.Tuple.Extra (second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace)
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Database.PostgreSQL.Simple as DPS
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Config (nodeTypeId)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Database.PostgreSQL.Simple as DPS
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment