[REFACT] Group fun and types

907c4bab · Alexandre Delanoë · c738e89d · 907c4bab · 907c4bab · 907c4bab
Commit 907c4bab authored Oct 15, 2020 by Alexandre Delanoë
4 changed files
--- a/src/Gargantext/Core/Text/Types.hs
+++ b/src/Gargantext/Core/Text/Types.hs
 {-|
-Module      : Gargantext.Core.Text.Types
+Module      : Gargantext.Core.Text.Group
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -11,7 +11,7 @@ Portability : POSIX
 {-# LANGUAGE TemplateHaskell   #-}
-module Gargantext.Core.Text.Types
+module Gargantext.Core.Text.Group
  where
 import Control.Lens (makeLenses, set)
@@ -21,10 +21,49 @@ import Data.Text (Text)
 import Gargantext.Core (Lang(..))
 import Gargantext.Core.Types (ListType(..))
 import Gargantext.Database.Admin.Types.Node (NodeId)
+import Gargantext.Core.Text.List.Learn (Model(..))
+import Gargantext.Core.Types (MasterCorpusId, UserCorpusId)
+import Gargantext.Core.Text.Terms.Mono.Stem (stem)
 import Gargantext.Prelude
-import qualified Data.Set as Set
+import qualified Data.Set  as Set
-import qualified Data.Map as Map
+import qualified Data.Map  as Map
 import qualified Data.List as List
+import qualified Data.Text as Text
+data NgramsListBuilder = BuilderStepO { stemSize :: !Int
+                                      , stemX    :: !Int
+                                      , stopSize :: !StopSize
+                                      }
+                       | BuilderStep1 { withModel :: !Model }
+                       | BuilderStepN { withModel :: !Model }
+                       | Tficf { nlb_lang           :: !Lang
+                               , nlb_group1         :: !Int
+                               , nlb_group2         :: !Int
+                               , nlb_stopSize       :: !StopSize
+                               , nlb_userCorpusId   :: !UserCorpusId
+                               , nlb_masterCorpusId :: !MasterCorpusId
+                               }
+data StopSize = StopSize {unStopSize :: !Int}
+-- | TODO: group with 2 terms only can be
+-- discussed. Main purpose of this is offering
+-- a first grouping option to user and get some
+-- enriched data to better learn and improve that algo
+ngramsGroup :: Lang
+            -> Int
+            -> Int
+            -> Text
+            -> Text
+ngramsGroup l _m _n = Text.intercalate " "
+                  . map (stem l)
+                  -- . take n
+                  . List.sort
+                  -- . (List.filter (\t -> Text.length t > m))
+                  . Text.splitOn " "
+                  . Text.replace "-" " "
 ------------------------------------------------------------------------------
 type Group = Lang -> Int -> Int -> Text -> Text

--- a/src/Gargantext/Core/Text/List.hs
+++ b/src/Gargantext/Core/Text/List.hs
@@ -31,13 +31,12 @@ import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(.
 import Gargantext.API.Ngrams.Types (RepoCmdM)
 import Gargantext.Core (Lang(..))
 import Gargantext.Core.Text (size)
-import Gargantext.Core.Text.List.Learn (Model(..))
 import Gargantext.Core.Text.List.Social (flowSocialList, invertForw)
 import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
-import Gargantext.Core.Text.Types
+import Gargantext.Core.Text.Group
 import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
 import Gargantext.Core.Types.Individu (User(..))
-import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser)
+import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser)
 import Gargantext.Database.Action.Metrics.TFICF (getTficf)
 import Gargantext.Database.Prelude (Cmd, CmdM)
 import Gargantext.Database.Query.Table.Node (defaultList)
@@ -47,23 +46,6 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..))
 import Gargantext.Prelude
-data NgramsListBuilder = BuilderStepO { stemSize :: !Int
-                                      , stemX    :: !Int
-                                      , stopSize :: !Int
-                                      }
-                       | BuilderStep1 { withModel :: !Model }
-                       | BuilderStepN { withModel :: !Model }
-                       | Tficf { nlb_lang           :: !Lang
-                               , nlb_group1         :: !Int
-                               , nlb_group2         :: !Int
-                               , nlb_stopSize       :: !StopSize
-                               , nlb_userCorpusId   :: !UserCorpusId
-                               , nlb_masterCorpusId :: !MasterCorpusId
-                               }
-data StopSize = StopSize {unStopSize :: !Int}
 -- | TODO improve grouping functions of Authors, Sources, Institutes..
 buildNgramsLists :: ( RepoCmdM env err m
                    , CmdM     env err m

--- a/src/Gargantext/Database/Action/Flow.hs
+++ b/src/Gargantext/Database/Action/Flow.hs
@@ -65,7 +65,8 @@ import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
 import Gargantext.Core.Flow.Types
 import Gargantext.Core.Text
 import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
-import Gargantext.Core.Text.List (buildNgramsLists,StopSize(..))
+import Gargantext.Core.Text.List (buildNgramsLists)
+import Gargantext.Core.Text.Group (StopSize(..))
 import Gargantext.Core.Text.Terms
 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
 import Gargantext.Core.Types (Terms(..))

--- a/src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+++ b/src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
@@ -36,24 +36,6 @@ import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
 import Gargantext.Database.Prelude (Cmd, runPGSQuery)
 import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
 import Gargantext.Prelude
-import Gargantext.Core.Text.Terms.Mono.Stem (stem)
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
-ngramsGroup :: Lang
-            -> Int
-            -> Int
-            -> Text
-            -> Text
-ngramsGroup l _m _n = Text.intercalate " "
-                  . map (stem l)
-                  -- . take n
-                  . List.sort
-                  -- . (List.filter (\t -> Text.length t > m))
-                  . Text.splitOn " "
-                  . Text.replace "-" " "