Commit 907c4bab authored by Alexandre Delanoë's avatar Alexandre Delanoë

[REFACT] Group fun and types

parent c738e89d
{-|
Module : Gargantext.Core.Text.Types
Module : Gargantext.Core.Text.Group
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -11,7 +11,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Types
module Gargantext.Core.Text.Group
where
import Control.Lens (makeLenses, set)
......@@ -21,10 +21,49 @@ import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Types (MasterCorpusId, UserCorpusId)
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !StopSize
}
| BuilderStep1 { withModel :: !Model }
| BuilderStepN { withModel :: !Model }
| Tficf { nlb_lang :: !Lang
, nlb_group1 :: !Int
, nlb_group2 :: !Int
, nlb_stopSize :: !StopSize
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
data StopSize = StopSize {unStopSize :: !Int}
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup :: Lang
-> Int
-> Int
-> Text
-> Text
ngramsGroup l _m _n = Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------------
type Group = Lang -> Int -> Int -> Text -> Text
......
......@@ -31,13 +31,12 @@ import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(.
import Gargantext.API.Ngrams.Types (RepoCmdM)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social (flowSocialList, invertForw)
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Core.Text.Types
import Gargantext.Core.Text.Group
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Database.Prelude (Cmd, CmdM)
import Gargantext.Database.Query.Table.Node (defaultList)
......@@ -47,23 +46,6 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !Int
}
| BuilderStep1 { withModel :: !Model }
| BuilderStepN { withModel :: !Model }
| Tficf { nlb_lang :: !Lang
, nlb_group1 :: !Int
, nlb_group2 :: !Int
, nlb_stopSize :: !StopSize
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
data StopSize = StopSize {unStopSize :: !Int}
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists :: ( RepoCmdM env err m
, CmdM env err m
......
......@@ -65,7 +65,8 @@ import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists,StopSize(..))
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Group (StopSize(..))
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..))
......
......@@ -36,24 +36,6 @@ import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup :: Lang
-> Int
-> Int
-> Text
-> Text
ngramsGroup l _m _n = Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment