Commit 907c4bab authored by Alexandre Delanoë's avatar Alexandre Delanoë

[REFACT] Group fun and types

parent c738e89d
{-| {-|
Module : Gargantext.Core.Text.Types Module : Gargantext.Core.Text.Group
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -11,7 +11,7 @@ Portability : POSIX ...@@ -11,7 +11,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Types module Gargantext.Core.Text.Group
where where
import Control.Lens (makeLenses, set) import Control.Lens (makeLenses, set)
...@@ -21,10 +21,49 @@ import Data.Text (Text) ...@@ -21,10 +21,49 @@ import Data.Text (Text)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..)) import Gargantext.Core.Types (ListType(..))
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Types (MasterCorpusId, UserCorpusId)
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Text as Text
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !StopSize
}
| BuilderStep1 { withModel :: !Model }
| BuilderStepN { withModel :: !Model }
| Tficf { nlb_lang :: !Lang
, nlb_group1 :: !Int
, nlb_group2 :: !Int
, nlb_stopSize :: !StopSize
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
data StopSize = StopSize {unStopSize :: !Int}
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup :: Lang
-> Int
-> Int
-> Text
-> Text
ngramsGroup l _m _n = Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
type Group = Lang -> Int -> Int -> Text -> Text type Group = Lang -> Int -> Int -> Text -> Text
......
...@@ -31,13 +31,12 @@ import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(. ...@@ -31,13 +31,12 @@ import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(.
import Gargantext.API.Ngrams.Types (RepoCmdM) import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size) import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social (flowSocialList, invertForw) import Gargantext.Core.Text.List.Social (flowSocialList, invertForw)
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal) import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Core.Text.Types import Gargantext.Core.Text.Group
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId) import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf) import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Database.Prelude (Cmd, CmdM) import Gargantext.Database.Prelude (Cmd, CmdM)
import Gargantext.Database.Query.Table.Node (defaultList) import Gargantext.Database.Query.Table.Node (defaultList)
...@@ -47,23 +46,6 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..)) ...@@ -47,23 +46,6 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude import Gargantext.Prelude
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !Int
}
| BuilderStep1 { withModel :: !Model }
| BuilderStepN { withModel :: !Model }
| Tficf { nlb_lang :: !Lang
, nlb_group1 :: !Int
, nlb_group2 :: !Int
, nlb_stopSize :: !StopSize
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
data StopSize = StopSize {unStopSize :: !Int}
-- | TODO improve grouping functions of Authors, Sources, Institutes.. -- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists :: ( RepoCmdM env err m buildNgramsLists :: ( RepoCmdM env err m
, CmdM env err m , CmdM env err m
......
...@@ -65,7 +65,8 @@ import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile) ...@@ -65,7 +65,8 @@ import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Core.Flow.Types import Gargantext.Core.Flow.Types
import Gargantext.Core.Text import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat) import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists,StopSize(..)) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Group (StopSize(..))
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..)) import Gargantext.Core.Types (Terms(..))
......
...@@ -36,24 +36,6 @@ import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId) ...@@ -36,24 +36,6 @@ import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery) import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..)) import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup :: Lang
-> Int
-> Int
-> Text
-> Text
ngramsGroup l _m _n = Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment