diff --git a/bin/gargantext-phylo/Main.hs b/bin/gargantext-phylo/Main.hs index fe44127b52dfa8f23064c09ac7f8f897d8c99477..0cc59fce0f5f8af643802daaf1fbc11c18dcbae5 100644 --- a/bin/gargantext-phylo/Main.hs +++ b/bin/gargantext-phylo/Main.hs @@ -34,6 +34,7 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight) import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList) import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Viz.Phylo @@ -42,7 +43,6 @@ import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink) import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Prelude hiding (hash, replace) import Prelude qualified import System.Directory (listDirectory,doesFileExist) diff --git a/bin/gargantext-phylo/Phylo/Common.hs b/bin/gargantext-phylo/Phylo/Common.hs index 14651012eefedb6e63abbce4b24fc0af5eb31084..6750bd0fac266950163a587588d2921248e1e9e1 100644 --- a/bin/gargantext-phylo/Phylo/Common.hs +++ b/bin/gargantext-phylo/Phylo/Common.hs @@ -18,13 +18,13 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight) import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList) import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo.API.Tools import Gargantext.Core.Viz.Phylo.PhyloTools (toPeriods, getTimePeriod, getTimeStep) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Prelude hiding (hash, replace) import Prelude qualified import System.Directory (listDirectory) diff --git a/gargantext.cabal b/gargantext.cabal index 6e2948427c1b49304a61ed5453bb36eb3ddc53fa..9f542a6cf8a98d63164dabcf6535ca77ab22415b 100644 --- a/gargantext.cabal +++ b/gargantext.cabal @@ -100,9 +100,9 @@ library Gargantext.API Gargantext.API.Admin.Auth.Types Gargantext.API.Admin.EnvTypes + Gargantext.API.Admin.Orchestrator.Types Gargantext.API.Admin.Settings Gargantext.API.Admin.Settings.CORS - Gargantext.API.Admin.Orchestrator.Types Gargantext.API.Admin.Types Gargantext.API.Auth.PolicyCheck Gargantext.API.Dev @@ -141,18 +141,21 @@ library Gargantext.Core.Text.Corpus.API Gargantext.Core.Text.Corpus.API.Arxiv Gargantext.Core.Text.Corpus.API.EPO - Gargantext.Core.Text.Corpus.API.Pubmed Gargantext.Core.Text.Corpus.API.OpenAlex - Gargantext.Core.Text.Corpus.Query + Gargantext.Core.Text.Corpus.API.Pubmed Gargantext.Core.Text.Corpus.Parsers Gargantext.Core.Text.Corpus.Parsers.CSV Gargantext.Core.Text.Corpus.Parsers.Date Gargantext.Core.Text.Corpus.Parsers.Date.Parsec + Gargantext.Core.Text.Corpus.Query + Gargantext.Core.Text.List + Gargantext.Core.Text.List.Group.WithStem Gargantext.Core.Text.List.Formats.CSV Gargantext.Core.Text.Metrics Gargantext.Core.Text.Metrics.CharByChar Gargantext.Core.Text.Metrics.Count Gargantext.Core.Text.Metrics.TFICF + Gargantext.Core.Text.Ngrams Gargantext.Core.Text.Prepare Gargantext.Core.Text.Search Gargantext.Core.Text.Terms @@ -170,8 +173,8 @@ library Gargantext.Core.Types Gargantext.Core.Types.Individu Gargantext.Core.Types.Main - Gargantext.Core.Types.Query Gargantext.Core.Types.Phylo + Gargantext.Core.Types.Query Gargantext.Core.Utils Gargantext.Core.Utils.Prefix Gargantext.Core.Viz.Graph @@ -189,6 +192,7 @@ library Gargantext.Core.Viz.Types Gargantext.Database.Action.Flow Gargantext.Database.Action.Flow.Types + Gargantext.Database.Action.Metrics.TFICF Gargantext.Database.Action.Search Gargantext.Database.Action.User Gargantext.Database.Action.User.New @@ -205,8 +209,8 @@ library Gargantext.Database.Query.Table.Node Gargantext.Database.Query.Table.Node.Error Gargantext.Database.Query.Table.Node.UpdateOpaleye - Gargantext.Database.Query.Tree.Root Gargantext.Database.Query.Table.User + Gargantext.Database.Query.Tree.Root Gargantext.Database.Schema.Ngrams Gargantext.Database.Schema.Node Gargantext.Database.Schema.User @@ -309,11 +313,9 @@ library Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler Gargantext.Core.Text.Corpus.Parsers.Wikimedia Gargantext.Core.Text.Learn - Gargantext.Core.Text.List Gargantext.Core.Text.List.Group Gargantext.Core.Text.List.Group.Prelude Gargantext.Core.Text.List.Group.WithScores - Gargantext.Core.Text.List.Group.WithStem Gargantext.Core.Text.List.Learn Gargantext.Core.Text.List.Merge Gargantext.Core.Text.List.Social @@ -365,7 +367,6 @@ library Gargantext.Database.Action.Metrics Gargantext.Database.Action.Metrics.Lists Gargantext.Database.Action.Metrics.NgramsByContext - Gargantext.Database.Action.Metrics.TFICF Gargantext.Database.Action.Node Gargantext.Database.Action.Share Gargantext.Database.Action.TSQuery diff --git a/src/Gargantext/API/Ngrams.hs b/src/Gargantext/API/Ngrams.hs index 9db6401e39a176907b79ddaef39b72eeb08cc06c..cb36c86742c1411d398de07ad1b8f29c3bfc6212 100644 --- a/src/Gargantext/API/Ngrams.hs +++ b/src/Gargantext/API/Ngrams.hs @@ -19,11 +19,10 @@ add get {-# OPTIONS_GHC -fno-warn-deprecations #-} {-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE IncoherentInstances #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeFamilies #-} - -{-# LANGUAGE IncoherentInstances #-} +{-# LANGUAGE TypeOperators #-} module Gargantext.API.Ngrams @@ -106,13 +105,13 @@ import Gargantext.API.Ngrams.Tools (getNodeStory) import Gargantext.API.Ngrams.Types import Gargantext.API.Prelude ( GargM ) import Gargantext.Core.NodeStory (ArchiveList, HasNodeStory, HasNodeArchiveStoryImmediateSaver(..), HasNodeStoryImmediateSaver(..), NgramsStatePatch', a_history, a_state, a_version, currentVersion) +import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType) import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasValidationError, ContextId) import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..)) import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast) import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Types.Node (NodeType(..)) -import Gargantext.Database.Query.Table.Ngrams ( text2ngrams, Ngrams, insertNgrams, selectNgramsByDoc ) -import Gargantext.Database.Schema.Ngrams qualified as TableNgrams +import Gargantext.Database.Query.Table.Ngrams ( text2ngrams, insertNgrams, selectNgramsByDoc ) import Gargantext.Database.Query.Table.Node (getNode) import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Select ( selectNodesWithUsername ) @@ -179,7 +178,7 @@ listTypeConflictResolution :: ListType -> ListType -> ListType listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType -ngramsStatePatchConflictResolution :: TableNgrams.NgramsType +ngramsStatePatchConflictResolution :: NgramsType -> NgramsTerm -> ConflictResolutionNgramsPatch ngramsStatePatchConflictResolution _ngramsType _ngramsTerm @@ -234,7 +233,7 @@ addListNgrams listId ngramsType nes = do -- UNSAFE setListNgrams :: HasNodeStory env err m => NodeId - -> TableNgrams.NgramsType + -> NgramsType -> Map NgramsTerm NgramsRepoElement -> m () setListNgrams listId ngramsType ns = do @@ -357,7 +356,7 @@ commitStatePatch listId (Versioned _p_version p) = do -- This is a special case of tableNgramsPut where the input patch is empty. tableNgramsPull :: HasNodeStory env err m => ListId - -> TableNgrams.NgramsType + -> NgramsType -> Version -> m (Versioned NgramsTablePatch) tableNgramsPull listId ngramsType p_version = do @@ -487,7 +486,7 @@ tableNgramsPostChartsAsync utn jobHandle = do getNgramsTableMap :: HasNodeStory env err m => NodeId - -> TableNgrams.NgramsType + -> NgramsType -> m (Versioned NgramsTableMap) getNgramsTableMap nodeId ngramsType = do a <- getNodeStory nodeId @@ -498,7 +497,7 @@ getNgramsTableMap nodeId ngramsType = do dumpJsonTableMap :: HasNodeStory env err m => Text -> NodeId - -> TableNgrams.NgramsType + -> NgramsType -> m () dumpJsonTableMap fpath nodeId ngramsType = do m <- getNgramsTableMap nodeId ngramsType @@ -617,7 +616,7 @@ getNgramsTable' :: forall env err m. , HasNodeError err ) => NodeId -> ListId - -> TableNgrams.NgramsType + -> NgramsType -> m (Versioned (Map.Map NgramsTerm NgramsElement)) getNgramsTable' nId listId ngramsType = do tableMap <- getNgramsTableMap listId ngramsType @@ -631,7 +630,7 @@ setNgramsTableScores :: forall env err m t. , HasNodeError err ) => NodeId -> ListId - -> TableNgrams.NgramsType + -> NgramsType -> t -> m t setNgramsTableScores nId listId ngramsType table = do @@ -821,7 +820,7 @@ apiNgramsAsync _dId = -- * listNgramsChangedSince: good precision, good bandwidth, bad computation. -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation. listNgramsChangedSince :: HasNodeStory env err m - => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool) + => ListId -> NgramsType -> Version -> m (Versioned Bool) listNgramsChangedSince listId ngramsType version | version < 0 = Versioned <$> currentVersion listId <*> pure True diff --git a/src/Gargantext/API/Ngrams/List.hs b/src/Gargantext/API/Ngrams/List.hs index 6ccaf390e6cfe2dd77e294ec24d1e15a46361a68..c443798a0ccf6d06557f00e8f82ae71732e73db3 100644 --- a/src/Gargantext/API/Ngrams/List.hs +++ b/src/Gargantext/API/Ngrams/List.hs @@ -10,7 +10,6 @@ Portability : POSIX -} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE ViewPatterns #-} @@ -29,21 +28,21 @@ import Data.Text (concat, pack, splitOn) import Data.Vector (Vector) import Data.Vector qualified as Vec import Gargantext.API.Admin.EnvTypes (Env, GargJob(..)) -import Gargantext.API.Admin.Orchestrator.Types -import Gargantext.API.Errors.Types +import Gargantext.API.Admin.Orchestrator.Types ( AsyncJobs, JobLog ) +import Gargantext.API.Errors.Types (BackendInternalError) import Gargantext.API.Ngrams (setListNgrams) import Gargantext.API.Ngrams.List.Types import Gargantext.API.Ngrams.Prelude (getNgramsList) import Gargantext.API.Ngrams.Types import Gargantext.API.Prelude (GargServer, GargM, serverError, HasServerError) -import Gargantext.API.Types -import Gargantext.Core.NodeStory +import Gargantext.API.Types (HTML) +import Gargantext.Core.NodeStory.Types ( HasNodeStory ) +import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType(NgramsTerms)) import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Database.Action.Flow (reIndexWith) -import Gargantext.Database.Admin.Types.Node -import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams +import Gargantext.Database.Admin.Types.Node ( NodeId(_NodeId), ListId ) import Gargantext.Database.Query.Table.Node (getNode) -import Gargantext.Database.Schema.Ngrams +import Gargantext.Database.Schema.Ngrams ( text2ngrams, NgramsId ) import Gargantext.Database.Schema.Node (_node_parent_id) import Gargantext.Database.Types (Indexed(..)) import Gargantext.Prelude hiding (concat, toList) @@ -113,7 +112,7 @@ getCsv :: HasNodeStory env err m -> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap) getCsv lId = do lst <- getNgramsList lId - pure $ case Map.lookup TableNgrams.NgramsTerms lst of + pure $ case Map.lookup NgramsTerms lst of Nothing -> noHeader Map.empty Just (Versioned { _v_data }) -> addHeader (concat [ "attachment; filename=GarganText_NgramsList-" diff --git a/src/Gargantext/API/Ngrams/Prelude.hs b/src/Gargantext/API/Ngrams/Prelude.hs index 8759967f02a298235ca2bd30f15e8d092088e38d..67cc4ff2e3a901024a47a1ee8da4f0b43cfcae2a 100644 --- a/src/Gargantext/API/Ngrams/Prelude.hs +++ b/src/Gargantext/API/Ngrams/Prelude.hs @@ -25,9 +25,9 @@ import Gargantext.API.Ngrams.Types import Gargantext.Core.NodeStory.Types ( HasNodeStory ) import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.List.Social.Prelude ( unPatchMapToHashMap ) +import Gargantext.Core.Text.Ngrams (NgramsType, ngramsTypes) import Gargantext.Core.Types.Main ( ListType ) import Gargantext.Database.Admin.Types.Node (ListId) -import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypes) import Gargantext.Prelude @@ -61,7 +61,7 @@ toTermList lt nt nl = toTermList' lt <$> Map.lookup nt nl where toTerm = Text.splitOn " " . unNgramsTerm - (roots, children) = List.partition (\(_t, nre) -> view nre_root nre == Nothing) + (roots, children) = List.partition (\(_t, nre) -> isNothing (view nre_root nre)) $ List.filter (\(_t,nre) -> view nre_list nre == lt'') ns roots' = map (\(t,nre) -> (t, map toTerm $ unMSet $ view nre_children nre )) roots diff --git a/src/Gargantext/API/Ngrams/Tools.hs b/src/Gargantext/API/Ngrams/Tools.hs index 790a303d2b02aacce0a1d3ebf413db83793c353f..e8abefa0452fa71c0948e0b9632906dd258b0653 100644 --- a/src/Gargantext/API/Ngrams/Tools.hs +++ b/src/Gargantext/API/Ngrams/Tools.hs @@ -12,7 +12,6 @@ Portability : POSIX {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# OPTIONS_GHC -Wno-unrecognised-pragmas #-} -{-# HLINT ignore "Use infix" #-} module Gargantext.API.Ngrams.Tools where @@ -24,11 +23,11 @@ import Data.HashMap.Strict qualified as HM import Data.Map.Strict qualified as Map import Data.Set qualified as Set -- import GHC.Conc (TVar, readTVar) -import Gargantext.API.Ngrams.Types -import Gargantext.Core.NodeStory +import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm), NgramsRepoElement(_nre_root, _nre_list) ) +import Gargantext.Core.NodeStory.Types +import Gargantext.Core.Text.Ngrams (NgramsType) import Gargantext.Core.Types.Main ( ListType(..) ) import Gargantext.Database.Admin.Types.Node ( NodeId, ListId ) -import Gargantext.Database.Schema.Ngrams (NgramsType) import Gargantext.Prelude diff --git a/src/Gargantext/API/Ngrams/Types.hs b/src/Gargantext/API/Ngrams/Types.hs index bf94f00de5abe25bf2c77fa58735e8d7a29749b7..1c77331ef5dbb04391b0cd01b6bb883445397f45 100644 --- a/src/Gargantext/API/Ngrams/Types.hs +++ b/src/Gargantext/API/Ngrams/Types.hs @@ -37,12 +37,12 @@ import Data.Validity ( Validity(..) ) import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField) import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField) import Gargantext.Core.Text (size) +import Gargantext.Core.Text.Ngrams qualified as Ngrams import Gargantext.Core.Types (ListType(..), ListId, NodeId, TODO) import Gargantext.Core.Types.Query (Limit, Offset, MaxSize, MinSize) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema) import Gargantext.Database.Admin.Types.Node (ContextId) import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM') -import Gargantext.Database.Schema.Ngrams qualified as TableNgrams import Gargantext.Prelude hiding (IsString, hash, from, replace, to) import Gargantext.Prelude.Crypto.Hash (IsHashable(..)) import Gargantext.Utils.Servant (CSV, ZIP) @@ -551,7 +551,7 @@ instance ToField NgramsTablePatch where toField = toJSONField -instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)) +instance FromField (PatchMap Ngrams.NgramsType (PatchMap NodeId NgramsTablePatch)) where fromField = fromField' @@ -747,21 +747,21 @@ type RepoCmdM env err m = -- Instances -instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap)) +instance FromHttpApiData (Map Ngrams.NgramsType (Versioned NgramsTableMap)) where parseUrlPiece x = maybeToEither x (decode $ cs x) -instance ToHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap)) where +instance ToHttpApiData (Map Ngrams.NgramsType (Versioned NgramsTableMap)) where toUrlPiece m = cs (encode m) -ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType +ngramsTypeFromTabType :: TabType -> Ngrams.NgramsType ngramsTypeFromTabType tabType = let here = "Garg.API.Ngrams: " :: Text in case tabType of - Sources -> TableNgrams.Sources - Authors -> TableNgrams.Authors - Institutes -> TableNgrams.Institutes - Terms -> TableNgrams.NgramsTerms + Sources -> Ngrams.Sources + Authors -> Ngrams.Authors + Institutes -> Ngrams.Institutes + Terms -> Ngrams.NgramsTerms _ -> panicTrace $ here <> "No Ngrams for this tab" -- TODO: This `panic` would disapear with custom NgramsType. @@ -784,7 +784,7 @@ instance ToSchema UpdateTableNgramsCharts where declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_utn_") ------------------------------------------------------------------------ -type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap)) +type NgramsList = (Map Ngrams.NgramsType (Versioned NgramsTableMap)) -- | Same as NgramsList, but wraps node_id so that the inner .json file can have proper name diff --git a/src/Gargantext/API/Node/Corpus/Export.hs b/src/Gargantext/API/Node/Corpus/Export.hs index 963e2a89fa4cd6146ef1e6338bd6a30ad71a97be..2f2ed5dba3780dfde80554299808bfce10c7dabe 100644 --- a/src/Gargantext/API/Node/Corpus/Export.hs +++ b/src/Gargantext/API/Node/Corpus/Export.hs @@ -27,7 +27,9 @@ import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) ) import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport import Gargantext.API.Prelude (GargNoServer) import Gargantext.Core.NodeStory.Types ( NodeListStory ) -import Gargantext.Core.Types +import Gargantext.Core.Text.Ngrams (NgramsType(..)) +import Gargantext.Core.Types.Main ( ListType(MapTerm) ) +import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser) import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) @@ -37,7 +39,6 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Schema.Context (_context_id) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Prelude hiding (hash) import Gargantext.Prelude.Crypto.Hash (hash) import Servant (Headers, Header, addHeader) diff --git a/src/Gargantext/API/Node/Corpus/Export/Types.hs b/src/Gargantext/API/Node/Corpus/Export/Types.hs index 17d411f86166e4712a680a1e2a56676f03c2c78b..52404178fad108f50fb3b91d31c274f12191fea6 100644 --- a/src/Gargantext/API/Node/Corpus/Export/Types.hs +++ b/src/Gargantext/API/Node/Corpus/Export/Types.hs @@ -14,13 +14,13 @@ Portability : POSIX module Gargantext.API.Node.Corpus.Export.Types where import Data.Aeson.TH (deriveJSON) -import Data.Swagger +import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, ToParamSchema(..) ) import Data.Text (Text) import GHC.Generics (Generic) -import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport -import Gargantext.Core.Types +import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport +import Gargantext.Core.Text.Ngrams (NgramsType(..)) +import Gargantext.Core.Types ( ListId, TODO ) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Servant diff --git a/src/Gargantext/API/Node/Update.hs b/src/Gargantext/API/Node/Update.hs index 8e3055d834def3e9c13d805d297e8618d8f88834..df6b9f755cb649f156027615c766bcc4ecf6535b 100644 --- a/src/Gargantext/API/Node/Update.hs +++ b/src/Gargantext/API/Node/Update.hs @@ -9,9 +9,8 @@ Portability : POSIX -} -{-# LANGUAGE TemplateHaskell #-} -{-# LANGUAGE TypeOperators #-} {-# OPTIONS_GHC -fno-warn-orphans #-} +{-# LANGUAGE TypeOperators #-} module Gargantext.API.Node.Update where @@ -19,16 +18,17 @@ module Gargantext.API.Node.Update import Control.Lens (view) import Data.Aeson import Data.Set qualified as Set -import Data.Swagger +import Data.Swagger ( ToSchema ) import Gargantext.API.Admin.EnvTypes (GargJob(..), Env) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Types (HasSettings) -import Gargantext.API.Errors.Types +import Gargantext.API.Errors.Types ( BackendInternalError ) import Gargantext.API.Metrics qualified as Metrics import Gargantext.API.Ngrams.Types qualified as NgramsTypes import Gargantext.API.Prelude (GargM, simuLogs) import Gargantext.Core.Methods.Similarities (GraphMetric(..)) -import Gargantext.Core.NodeStory (HasNodeStory) +import Gargantext.Core.NodeStory.Types (HasNodeStory) +import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms)) import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Viz.Graph.API (recomputeGraph) import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..), BridgenessMethod(..)) @@ -38,20 +38,19 @@ import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI) import Gargantext.Database.Action.Flow (reIndexWith) import Gargantext.Database.Action.Flow.Pairing (pairing) import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore) -import Gargantext.Database.Admin.Types.Hyperdata -import Gargantext.Database.Admin.Types.Node +import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(HyperdataPhylo) ) +import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus, NodeAnnuaire) ) import Gargantext.Database.Query.Table.Node (defaultList, getNode) import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata) -import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms)) import Gargantext.Database.Schema.Node (node_parent_id) import Gargantext.Prelude +import Gargantext.System.Logging ( MonadLogger ) import Gargantext.Utils.Aeson qualified as GUA import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) +import Gargantext.Utils.UTCTime (timeMeasured) import Servant import Test.QuickCheck (elements) -import Test.QuickCheck.Arbitrary -import Gargantext.Utils.UTCTime (timeMeasured) -import Gargantext.System.Logging +import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) ) ------------------------------------------------------------------------ type API = Summary " Update node according to NodeType params" diff --git a/src/Gargantext/Core/Flow/Types.hs b/src/Gargantext/Core/Flow/Types.hs index 7ac391f7a73a5c3e966a9f61f0b39a61a8ca81b0..ea92578dc336aea13dd9b45a2c44951dfaef0762 100644 --- a/src/Gargantext/Core/Flow/Types.hs +++ b/src/Gargantext/Core/Flow/Types.hs @@ -14,8 +14,8 @@ Portability : POSIX module Gargantext.Core.Flow.Types where -import Control.Lens -import Gargantext.Database.Admin.Types.Node +import Control.Lens ( Lens' ) +import Gargantext.Database.Admin.Types.Node ( Node ) import Gargantext.Database.Schema.Node (node_hash_id) import Gargantext.Prelude import Gargantext.Prelude.Crypto.Hash (Hash) diff --git a/src/Gargantext/Core/NodeStory.hs b/src/Gargantext/Core/NodeStory.hs index c8b4b05444c4e865087e3ed42b890f2be41a2100..5cf695063a99d17d4e5e749cb1b20863ceb76b7e 100644 --- a/src/Gargantext/Core/NodeStory.hs +++ b/src/Gargantext/Core/NodeStory.hs @@ -73,10 +73,10 @@ import Database.PostgreSQL.Simple.ToField qualified as PGS import Gargantext.API.Ngrams.Types import Gargantext.Core.NodeStory.DB import Gargantext.Core.NodeStory.Types +import Gargantext.Core.Text.Ngrams qualified as Ngrams import Gargantext.Database.Admin.Types.Node ( ListId, NodeId(..) ) import Gargantext.Database.Admin.Config () import Gargantext.Database.Prelude (HasConnectionPool(..)) -import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams import Gargantext.Prelude hiding (to) import Gargantext.Prelude.Database ( runPGSAdvisoryXactLock, runPGSExecute, runPGSQuery ) @@ -84,7 +84,7 @@ import Gargantext.Prelude.Database ( runPGSAdvisoryXactLock, runPGSExecute, runP getNodeStory' :: PGS.Connection -> NodeId -> IO ArchiveList getNodeStory' c nId = do --res <- withResource pool $ \c -> runSelect c query :: IO [NodeStoryPoly NodeId Version Int Int NgramsRepoElement] - res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)] + res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, Ngrams.NgramsType, NgramsTerm, NgramsRepoElement)] -- We have multiple rows with same node_id and different (ngrams_type_id, ngrams_id). -- Need to create a map: {<node_id>: {<ngrams_type_id>: {<ngrams_id>: <data>}}} let dbData = map (\(version, ngramsType, ngrams, ngrams_repo_element) -> @@ -235,9 +235,9 @@ fixChildrenInNgrams ns = archiveStateFromList $ nsParents <> nsChildrenFixed ) ) <$> nsChildren --- | Sometimes, when we upload a new list, a child can be left without --- a parent. Find such ngrams and set their 'root' and 'parent' to --- 'Nothing'. +-- | (#281) Sometimes, when we upload a new list, a child can be left +-- without a parent. Find such ngrams and set their 'root' and +-- 'parent' to 'Nothing'. fixChildrenWithNoParent :: NgramsState' -> NgramsState' fixChildrenWithNoParent ns = archiveStateFromList $ nsParents <> nsChildrenFixed where @@ -341,13 +341,13 @@ fixNodeStoryVersions = do -- printDebug "[fixNodeStoryVersions] nIds" nIds mapM_ (\(PGS.Only nId) -> do -- printDebug "[fixNodeStoryVersions] nId" nId - updateVer c TableNgrams.Authors nId + updateVer c Ngrams.Authors nId - updateVer c TableNgrams.Institutes nId + updateVer c Ngrams.Institutes nId - updateVer c TableNgrams.Sources nId + updateVer c Ngrams.Sources nId - updateVer c TableNgrams.NgramsTerms nId + updateVer c Ngrams.NgramsTerms nId pure () ) nIds @@ -363,7 +363,7 @@ fixNodeStoryVersions = do SET version = ? WHERE node_id = ? AND ngrams_type_id = ? |] - updateVer :: PGS.Connection -> TableNgrams.NgramsType -> Int64 -> IO () + updateVer :: PGS.Connection -> Ngrams.NgramsType -> Int64 -> IO () updateVer c ngramsType nId = do maxVer <- runPGSQuery c maxVerQuery (nId, ngramsType) :: IO [PGS.Only (Maybe Int64)] case maxVer of diff --git a/src/Gargantext/Core/NodeStory/DB.hs b/src/Gargantext/Core/NodeStory/DB.hs index 0633f344339193e6806100b5c726b95840ac11a6..ba6d1ce3d4c09b63e4ddee0affd9ae92ae4e131d 100644 --- a/src/Gargantext/Core/NodeStory/DB.hs +++ b/src/Gargantext/Core/NodeStory/DB.hs @@ -12,7 +12,6 @@ Portability : POSIX {-# LANGUAGE Arrows #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE QuasiQuotes #-} -{-# LANGUAGE TemplateHaskell #-} module Gargantext.Core.NodeStory.DB ( nodeExists @@ -27,22 +26,20 @@ module Gargantext.Core.NodeStory.DB where import Control.Lens ((^.)) -import Control.Monad.Except import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap import Data.Map.Strict qualified as Map import Data.Map.Strict.Patch qualified as PM -import Data.Monoid import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Gargantext.API.Ngrams.Types import Gargantext.Core (toDBid) -import Gargantext.Core.NodeStory.Types -import Gargantext.Core.Types (NodeId(..), NodeType) +import Gargantext.Core.NodeStory.Types ( a_state, a_version, ArchiveList, ArchiveStateList, NgramsStatePatch' ) +import Gargantext.Core.Text.Ngrams (NgramsType) +import Gargantext.Database.Admin.Types.Node ( NodeId(..), NodeType ) import Gargantext.Database.Admin.Config () -import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams -import Gargantext.Database.Schema.Ngrams (NgramsType) +import Gargantext.Database.Schema.Ngrams () import Gargantext.Prelude hiding (to) import Gargantext.Prelude.Database @@ -70,7 +67,7 @@ getNodesArchiveHistory :: PGS.Connection -> IO [(NodeId, (Map NgramsType [HashMap NgramsTerm NgramsPatch]))] getNodesArchiveHistory c nodesId = do as <- runPGSQuery c query (PGS.Only $ Values fields nodesId) - :: IO [(Int, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)] + :: IO [(Int, NgramsType, NgramsTerm, NgramsPatch)] pure $ map (\(nId, ngramsType, terms, patch) -> ( UnsafeMkNodeId nId @@ -96,11 +93,11 @@ insertNodeArchiveHistory _ _ _ [] = pure () insertNodeArchiveHistory c nodeId version (h:hs) = do let tuples = mconcat $ (\(nType, NgramsTablePatch patch) -> (\(term, p) -> - (nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)] + (nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, NgramsType, NgramsTerm, NgramsPatch)] tuplesM <- mapM (\(nId, nType, term, patch) -> do [PGS.Only ngramsId] <- runPGSReturning c qInsert [PGS.Only term] :: IO [PGS.Only Int] pure (nId, nType, ngramsId, term, patch) - ) tuples :: IO [(NodeId, TableNgrams.NgramsType, Int, NgramsTerm, NgramsPatch)] + ) tuples :: IO [(NodeId, NgramsType, Int, NgramsTerm, NgramsPatch)] _ <- runPGSExecuteMany c query $ ((\(nId, nType, termId, _term, patch) -> (nId, nType, termId, patch, version)) <$> tuplesM) _ <- insertNodeArchiveHistory c nodeId version hs pure () diff --git a/src/Gargantext/Core/NodeStory/Types.hs b/src/Gargantext/Core/NodeStory/Types.hs index 94bb5f80e29a3fa9537bf701da03ed7353d926f0..e3d1bd1afde01321e81a54f4654cc7ce8711b867 100644 --- a/src/Gargantext/Core/NodeStory/Types.hs +++ b/src/Gargantext/Core/NodeStory/Types.hs @@ -56,10 +56,10 @@ import Data.Set qualified as Set import Database.PostgreSQL.Simple.FromField (FromField(fromField), fromJSONField) import Gargantext.API.Ngrams.Types import Gargantext.Database.Admin.Types.Node ( NodeId(..) ) +import Gargantext.Core.Text.Ngrams qualified as Ngrams import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Database.Admin.Config () import Gargantext.Database.Prelude (DbCmd') -import Gargantext.Database.Schema.Ngrams qualified as TableNgrams import Gargantext.Database.Query.Table.Node.Error (HasNodeError()) import Gargantext.Prelude hiding (to) import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField) @@ -100,8 +100,8 @@ instance (Serialise s, Serialise p) => Serialise (Archive s p) type NodeListStory = NodeStory NgramsState' NgramsStatePatch' -- NOTE: 'type NgramsTableMap = Map NgramsTerm NgramsRepoElement' -type NgramsState' = Map TableNgrams.NgramsType NgramsTableMap -type NgramsStatePatch' = PatchMap TableNgrams.NgramsType NgramsTablePatch +type NgramsState' = Map Ngrams.NgramsType NgramsTableMap +type NgramsStatePatch' = PatchMap Ngrams.NgramsType NgramsTablePatch -- instance Serialise NgramsStatePatch' instance FromField (Archive NgramsState' NgramsStatePatch') where @@ -167,7 +167,7 @@ initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive archive = Archive { _a_version = 0 , _a_state = ngramsTableMap , _a_history = [] } - ngramsTableMap = Map.singleton TableNgrams.NgramsTerms + ngramsTableMap = Map.singleton Ngrams.NgramsTerms $ Map.fromList [ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable @@ -231,8 +231,8 @@ class HasNodeArchiveStoryImmediateSaver env where -type ArchiveStateList = [(TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)] -type ArchiveStateSet = Set.Set (TableNgrams.NgramsType, NgramsTerm) +type ArchiveStateList = [(Ngrams.NgramsType, NgramsTerm, NgramsRepoElement)] +type ArchiveStateSet = Set.Set (Ngrams.NgramsType, NgramsTerm) ------------------------------------------------------------------------ ------------------------------------------------------------------------ diff --git a/src/Gargantext/Core/Text/Corpus/Parsers.hs b/src/Gargantext/Core/Text/Corpus/Parsers.hs index 17ebb62e5806a642bc7e2391cc7e47187e9a663d..97215840a97715131cfb927a5f97f342a1acbbeb 100644 --- a/src/Gargantext/Core/Text/Corpus/Parsers.hs +++ b/src/Gargantext/Core/Text/Corpus/Parsers.hs @@ -46,11 +46,11 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex) import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich) import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) -import Gargantext.Database.Query.Table.Ngrams (NgramsType(..)) import Gargantext.Prelude hiding (show, undefined) import Gargantext.Utils.Zip qualified as UZip -import Protolude +import Protolude ( show ) import System.FilePath (takeExtension) ------------------------------------------------------------------------ diff --git a/src/Gargantext/Core/Text/List.hs b/src/Gargantext/Core/Text/List.hs index 55110d96e27d8638d546a3f4d8b62722bd9343aa..bb138378562c9849b0d38732f21382e89c9f6524 100644 --- a/src/Gargantext/Core/Text/List.hs +++ b/src/Gargantext/Core/Text/List.hs @@ -9,14 +9,13 @@ Portability : POSIX -} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} module Gargantext.Core.Text.List where -import Control.Lens hiding (both) -- ((^.), view, over, set, (_1), (_2)) +import Control.Lens ( view, over ) -- ((^.), view, over, set, (_1), (_2)) import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap import Data.HashSet (HashSet) @@ -27,26 +26,27 @@ import Data.Set qualified as Set import Data.Tuple.Extra (both) import Gargantext.API.Ngrams.Types (NgramsElement, NgramsTerm(..)) import Gargantext.Core.NLP (HasNLPServer) -import Gargantext.Core.NodeStory +import Gargantext.Core.NodeStory.Types ( HasNodeStory ) import Gargantext.Core.Text (size) -import Gargantext.Core.Text.List.Group +import Gargantext.Core.Text.List.Group ( toGroupedTree, setScoresWithMap ) import Gargantext.Core.Text.List.Group.Prelude import Gargantext.Core.Text.List.Group.WithStem -import Gargantext.Core.Text.List.Social -import Gargantext.Core.Text.List.Social.Prelude +import Gargantext.Core.Text.List.Social ( FlowSocialListWith, flowSocialList ) +import Gargantext.Core.Text.List.Social.Prelude ( FlowListScores, FlowCont(FlowCont), flc_scores ) import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_genInc, normalizeGlobal, normalizeLocal, scored_terms) -import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, ContextId) +import Gargantext.Core.Text.Ngrams (NgramsType(..), Ngrams(..)) import Gargantext.Core.Types.Individu (User(..)) +import Gargantext.Core.Types.Main ( ListType(..) ) import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, getContextsByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.TFICF (getTficf_withSample) +import Gargantext.Database.Admin.Types.Node ( MasterCorpusId, UserCorpusId, ContextId ) import Gargantext.Database.Prelude (DBCmd) -import Gargantext.Database.Query.Table.Ngrams (text2ngrams) import Gargantext.Database.Query.Table.NgramsPostag (selectLems) import Gargantext.Database.Query.Table.Node (defaultList) import Gargantext.Database.Query.Table.Node.Error (HasNodeError()) import Gargantext.Database.Query.Tree.Error (HasTreeError) -import Gargantext.Database.Schema.Ngrams (NgramsType(..), Ngrams(..)) +import Gargantext.Database.Schema.Ngrams (text2ngrams) import Gargantext.Prelude {- @@ -81,8 +81,8 @@ buildNgramsLists user uCid mCid mfslw gp = do pure $ Map.unions $ [ngTerms] <> othersTerms -data MapListSize = MapListSize { unMapListSize :: !Int } -data MaxListSize = MaxListSize { unMaxListSize :: !Int } +newtype MapListSize = MapListSize { unMapListSize :: Int } +newtype MaxListSize = MaxListSize { unMaxListSize :: Int } buildNgramsOthersList :: ( HasNodeError err , HasNLPServer env @@ -103,7 +103,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize, <- flowSocialList mfslw user nt ( FlowCont HashMap.empty $ HashMap.fromList $ List.zip (HashMap.keys allTerms) - (List.cycle [mempty]) + (repeat mempty) ) let groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms @@ -113,29 +113,36 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize, (mapTerms, tailTerms') = HashMap.partition ((== Just MapTerm) . viewListType) tailTerms - listSize = mapListSize - (List.length mapTerms) + listSize = mapListSize - List.length mapTerms (mapTerms', candiTerms) = both HashMap.fromList $ List.splitAt listSize $ List.take maxListSize $ List.sortOn (Down . viewScore . snd) $ HashMap.toList tailTerms' - - - pure $ Map.fromList [( nt, List.take maxListSize $ (toNgramsElement stopTerms) - <> (toNgramsElement mapTerms ) - <> (toNgramsElement $ setListType (Just MapTerm ) mapTerms' ) - <> (toNgramsElement $ setListType (Just CandidateTerm) candiTerms) + + pure $ Map.fromList [( nt, List.take maxListSize $ toNgramsElement stopTerms + <> toNgramsElement mapTerms + <> toNgramsElement (setListType (Just MapTerm ) mapTerms') + <> toNgramsElement (setListType (Just CandidateTerm) candiTerms) )] +-- | https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169#note_10049 +-- Stemming can be useful if you do not have any context: ok for full text search then. +-- +-- In document, we have context so we can add grammar and linguistics +-- rules to be more precise than the stemmatization, that is why the +-- lemmatization is used here to group. Basically it will avoid +-- grouping homonyms in list. In search usually you add more context +-- to "control" the stemmatization approximation. getGroupParams :: ( HasNodeError err , HasTreeError err ) => GroupParams -> HashSet Ngrams -> DBCmd err GroupParams -getGroupParams gp@(GroupWithPosTag l nsc _m) ng = do - !hashMap <- HashMap.fromList <$> selectLems l nsc (HashSet.toList ng) +getGroupParams gp@(GroupWithPosTag { .. }) ng = do + !hashMap <- HashMap.fromList <$> selectLems _gwl_lang _gwl_nlp_config (HashSet.toList ng) -- printDebug "hashMap" hashMap - pure $ over gwl_map (\x -> x <> hashMap) gp + pure $ over gwl_map (<> hashMap) gp getGroupParams gp _ = pure gp @@ -167,8 +174,8 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi !(socialLists :: FlowCont NgramsTerm FlowListScores) <- flowSocialList mfslw user nt ( FlowCont HashMap.empty $ HashMap.fromList - $ List.zip (HashMap.keys allTerms) - (List.cycle [mempty]) + $ List.zip (HashMap.keys allTerms) + (repeat mempty) ) -- printDebug "[buildNgramsTermsList: Flow Social List / end]" nt @@ -181,17 +188,17 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi !groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) ngramsKeys) - -- printDebug "[buildNgramsTermsList: groupParams']" ("" :: Text) - let !socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists !groupedWithList = toGroupedTree socialLists_Stemmed allTerms !(stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType) - $ HashMap.filter (\g -> (view gts'_score g) > 1) + $ HashMap.filter (\g -> view gts'_score g > 1) $ view flc_scores groupedWithList !(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms + -- void $ panicTrace $ "groupedWithList: " <> show groupedWithList + -- printDebug "[buildNgramsTermsList] socialLists" socialLists -- printDebug "[buildNgramsTermsList] socialLists with scores" socialLists_Stemmed -- printDebug "[buildNgramsTermsList] groupedWithList" groupedWithList @@ -269,8 +276,8 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi !(monoScored, multScored) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) groupedTreeScores_SpeGen -- filter with max score - partitionWithMaxScore = HashMap.partition (\g -> (view scored_genInc $ view gts'_score g) - > (view scored_speExc $ view gts'_score g) + partitionWithMaxScore = HashMap.partition (\g -> view scored_genInc (view gts'_score g) + > view scored_speExc (view gts'_score g) ) !(monoScoredIncl, monoScoredExcl) = partitionWithMaxScore monoScored @@ -285,25 +292,25 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi !inclSize = 0.4 :: Double !exclSize = 1 - inclSize - splitAt'' max' n' = (both (HashMap.fromList)) . (List.splitAt (round $ n' * max')) - sortOn' f = (List.sortOn (Down . (view (gts'_score . f)) . snd)) . HashMap.toList + splitAt'' max' n' = both HashMap.fromList . List.splitAt (round $ n' * max') + sortOn' f = List.sortOn (Down . view (gts'_score . f) . snd) . HashMap.toList monoInc_size n = splitAt'' n $ monoSize * inclSize / 2 multExc_size n = splitAt'' n $ multSize * exclSize / 2 - !(mapMonoScoredInclHead, monoScoredInclTail) = monoInc_size mapSize $ (sortOn' scored_genInc) monoScoredIncl - !(mapMonoScoredExclHead, monoScoredExclTail) = monoInc_size mapSize $ (sortOn' scored_speExc) monoScoredExcl + !(mapMonoScoredInclHead, monoScoredInclTail) = monoInc_size mapSize $ sortOn' scored_genInc monoScoredIncl + !(mapMonoScoredExclHead, monoScoredExclTail) = monoInc_size mapSize $ sortOn' scored_speExc monoScoredExcl - !(mapMultScoredInclHead, multScoredInclTail) = multExc_size mapSize $ (sortOn' scored_genInc) multScoredIncl - !(mapMultScoredExclHead, multScoredExclTail) = multExc_size mapSize $ (sortOn' scored_speExc) multScoredExcl + !(mapMultScoredInclHead, multScoredInclTail) = multExc_size mapSize $ sortOn' scored_genInc multScoredIncl + !(mapMultScoredExclHead, multScoredExclTail) = multExc_size mapSize $ sortOn' scored_speExc multScoredExcl - !(canMonoScoredIncHead , _) = monoInc_size canSize $ (sortOn' scored_genInc) monoScoredInclTail - !(canMonoScoredExclHead, _) = monoInc_size canSize $ (sortOn' scored_speExc) monoScoredExclTail + !(canMonoScoredIncHead , _) = monoInc_size canSize $ sortOn' scored_genInc monoScoredInclTail + !(canMonoScoredExclHead, _) = monoInc_size canSize $ sortOn' scored_speExc monoScoredExclTail - !(canMulScoredInclHead, _) = multExc_size canSize $ (sortOn' scored_genInc) multScoredInclTail - !(canMultScoredExclHead, _) = multExc_size canSize $ (sortOn' scored_speExc) multScoredExclTail + !(canMulScoredInclHead, _) = multExc_size canSize $ sortOn' scored_genInc multScoredInclTail + !(canMultScoredExclHead, _) = multExc_size canSize $ sortOn' scored_speExc multScoredExclTail ------------------------------------------------------------ -- Final Step building the Typed list diff --git a/src/Gargantext/Core/Text/List/Group.hs b/src/Gargantext/Core/Text/List/Group.hs index f7eb1653eceacf100395c1d3673211612c9e62bb..53833218c1f5d3e5f2b838d710ffa7d7d3a9ee2f 100644 --- a/src/Gargantext/Core/Text/List/Group.hs +++ b/src/Gargantext/Core/Text/List/Group.hs @@ -9,11 +9,8 @@ Portability : POSIX -} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE FunctionalDependencies #-} -{-# LANGUAGE InstanceSigs #-} module Gargantext.Core.Text.List.Group where @@ -23,8 +20,8 @@ import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.Core.Text.List.Group.Prelude -import Gargantext.Core.Text.List.Group.WithScores -import Gargantext.Core.Text.List.Social.Prelude +import Gargantext.Core.Text.List.Group.WithScores ( groupWithScores' ) +import Gargantext.Core.Text.List.Social.Prelude ( FlowListScores, FlowCont ) import Gargantext.Prelude ------------------------------------------------------------------------ toGroupedTree :: (Ord a, Monoid a, HasSize a) @@ -43,9 +40,7 @@ setScoresWithMap :: (Ord a, Ord b, Monoid b) => HashMap NgramsTerm b -> HashMap NgramsTerm (GroupedTreeScores b) setScoresWithMap m = setScoresWith (score m) where - score m' t = case HashMap.lookup t m' of - Nothing -> mempty - Just r -> r + score m' t = fromMaybe mempty (HashMap.lookup t m') setScoresWith :: (Ord a, Ord b) => (NgramsTerm -> b) @@ -58,8 +53,7 @@ setScoresWith f = Map.mapWithKey (\k v -> over gts'_children (setScoresWith f) ) -} setScoresWith f = HashMap.mapWithKey (\k v -> v { _gts'_score = f k - , _gts'_children = setScoresWith f - $ view gts'_children v - } - ) + , _gts'_children = setScoresWith f $ view gts'_children v + } + ) ------------------------------------------------------------------------ diff --git a/src/Gargantext/Core/Text/List/Group/WithStem.hs b/src/Gargantext/Core/Text/List/Group/WithStem.hs index d0586f5744ec3fbead64b4d2304262faac3dc763..f3b39734fdab6cb7026cdb38b00b92ab800668f8 100644 --- a/src/Gargantext/Core/Text/List/Group/WithStem.hs +++ b/src/Gargantext/Core/Text/List/Group/WithStem.hs @@ -25,11 +25,11 @@ import Data.HashSet qualified as Set import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Text qualified as Text -import Gargantext.API.Ngrams.Types +import Gargantext.API.Ngrams.Types ( toNgramsPatch, NgramsPatch, NgramsTerm(..) ) import Gargantext.Core (Lang(..), Form, Lem, NLPServerConfig) -import Gargantext.Core.Text.List.Group.Prelude -import Gargantext.Core.Text.List.Social.Patch -import Gargantext.Core.Text.List.Social.Prelude +import Gargantext.Core.Text.List.Group.Prelude ( Stem ) +import Gargantext.Core.Text.List.Social.Patch ( addScorePatch ) +import Gargantext.Core.Text.List.Social.Prelude ( FlowCont, FlowListScores ) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..)) import Gargantext.Prelude @@ -43,8 +43,8 @@ addScoreStem groupParams ngrams fl = foldl' addScorePatch fl ------------------------------------------------------------------------ -- | Main Types -data StopSize = StopSize {unStopSize :: !Int} - deriving (Eq) +newtype StopSize = StopSize {unStopSize :: Int} + deriving (Eq, Show) -- | TODO: group with 2 terms only can be -- discussed. Main purpose of this is offering @@ -61,7 +61,7 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang , _gwl_nlp_config :: !NLPServerConfig , _gwl_map :: !(HashMap Form Lem) } - deriving (Eq) + deriving (Eq, Show) ------------------------------------------------------------------------ groupWith :: GroupParams @@ -80,7 +80,6 @@ groupWith (GroupParams { unGroupParams_lang = l }) t = $ Text.splitOn " " $ Text.replace "-" " " $ unNgramsTerm t - -- | This lemmatization group done with CoreNLP algo (or others) groupWith (GroupWithPosTag { _gwl_map = m }) t = case HashMap.lookup (unNgramsTerm t) m of diff --git a/src/Gargantext/Core/Text/List/Social.hs b/src/Gargantext/Core/Text/List/Social.hs index 5a9281992577baed55fc52101773a5b78a59a0d1..e10ddf66193d4d00e14da3f93185595f8b1fbe25 100644 --- a/src/Gargantext/Core/Text/List/Social.hs +++ b/src/Gargantext/Core/Text/List/Social.hs @@ -18,24 +18,23 @@ import Data.Aeson import Data.HashMap.Strict (HashMap) import Data.List qualified as List import Data.Map.Strict qualified as Map -import Data.Pool -import Data.Swagger +import Data.Pool ( withResource ) +import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, defaultSchemaOptions ) import Data.Text qualified as T import Data.Vector qualified as V -import GHC.Generics import Gargantext.API.Ngrams.Types (NgramsTerm, NgramsPatch) -import Gargantext.Core.NodeStory (getNodesArchiveHistory) +import Gargantext.Core.NodeStory.DB ( getNodesArchiveHistory ) import Gargantext.Core.Text.List.Social.Find (findListsId) import Gargantext.Core.Text.List.Social.Patch (addScorePatches) import Gargantext.Core.Text.List.Social.Prelude (FlowCont, FlowListScores) +import Gargantext.Core.Text.Ngrams (NgramsType) import Gargantext.Core.Types.Individu (User) import Gargantext.Database.Admin.Types.Node (ListId) import Gargantext.Database.Prelude (DBCmd, connPool) import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Tree (NodeMode(Private), HasTreeError) -import Gargantext.Database.Schema.Ngrams (NgramsType) import Gargantext.Prelude -import Test.QuickCheck +import Test.QuickCheck ( Arbitrary(arbitrary), oneof, arbitraryBoundedEnum ) import Web.Internal.HttpApiData (ToHttpApiData, FromHttpApiData, parseUrlPiece, toUrlPiece) ------------------------------------------------------------------------ ------------------------------------------------------------------------ diff --git a/src/Gargantext/Core/Text/List/Social/Patch.hs b/src/Gargantext/Core/Text/List/Social/Patch.hs index d6d588df751ecd21b33c9d91ab1b04ad2af68a96..a498c14784951ea5cf43bebf779da7d1722ee345 100644 --- a/src/Gargantext/Core/Text/List/Social/Patch.hs +++ b/src/Gargantext/Core/Text/List/Social/Patch.hs @@ -14,14 +14,13 @@ module Gargantext.Core.Text.List.Social.Patch import Control.Lens hiding (cons) import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap -import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Patch.Class qualified as Patch (Replace(..)) import Gargantext.API.Ngrams.Prelude (unMSet, patchMSet_toList) -import Gargantext.API.Ngrams.Types +import Gargantext.API.Ngrams.Types ( NgramsTerm, nre_children, nre_list, MSet, NgramsPatch(..) ) import Gargantext.Core.Text.List.Social.Prelude -import Gargantext.Core.Types (ListId) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) +import Gargantext.Database.Admin.Types.Node ( ListId ) import Gargantext.Prelude addScorePatches :: NgramsType -> [ListId] @@ -40,7 +39,7 @@ addScorePatchesList :: NgramsType addScorePatchesList nt repo fl lid = foldl' addScorePatch fl patches where - patches = maybe [] (List.concat . (map HashMap.toList)) patches' + patches = maybe [] (concatMap HashMap.toList) patches' patches' = do lists <- Map.lookup lid repo diff --git a/src/Gargantext/Core/Text/Ngrams.hs b/src/Gargantext/Core/Text/Ngrams.hs new file mode 100644 index 0000000000000000000000000000000000000000..5b032b78877348a230df1d566161d75e52d88c34 --- /dev/null +++ b/src/Gargantext/Core/Text/Ngrams.hs @@ -0,0 +1,109 @@ +{-| +Module : Gargantext.Core.Text.Ngrams +Description : Main Ngrams types +Copyright : (c) CNRS, 2017-Present +License : AGPL + CECILL v3 +Maintainer : team@gargantext.org +Stability : experimental +Portability : POSIX +-} + +{-# OPTIONS_GHC -fno-warn-orphans #-} +{-# LANGUAGE Arrows #-} +{-# LANGUAGE FunctionalDependencies #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE TemplateHaskell #-} + +module Gargantext.Core.Text.Ngrams + where + + +import Codec.Serialise (Serialise()) +import Control.Lens (over) +import Data.Aeson ( ToJSON(..), FromJSON(..), FromJSONKey(..), FromJSONKeyFunction(..), Value(String), ToJSONKey(..) ) +import Data.Aeson.Types (toJSONKeyText) +import Data.Text (pack) +import Database.PostgreSQL.Simple qualified as PGS +import Gargantext.Core.Types (TODO(..)) +import Gargantext.Database.Schema.Prelude hiding (over) +import Gargantext.Prelude +import Servant (FromHttpApiData(..), ToHttpApiData(..)) +import Test.QuickCheck (elements) +import Text.Read (read) + + +-- | Main Ngrams Types +-- | Typed Ngrams +-- Typed Ngrams localize the context of the ngrams +-- ngrams in source field of document has Sources Type +-- ngrams in authors field of document has Authors Type +-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract) +data NgramsType = Authors | Institutes | Sources | NgramsTerms + deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic) + +instance Serialise NgramsType +instance FromJSON NgramsType + where + parseJSON (String "Authors") = pure Authors + parseJSON (String "Institutes") = pure Institutes + parseJSON (String "Sources") = pure Sources + parseJSON (String "Terms") = pure NgramsTerms + parseJSON (String "NgramsTerms") = pure NgramsTerms + parseJSON _ = mzero + +instance FromJSONKey NgramsType where + fromJSONKey = FromJSONKeyTextParser (parseJSON . String) + +instance ToJSON NgramsType + where + toJSON Authors = String "Authors" + toJSON Institutes = String "Institutes" + toJSON Sources = String "Sources" + toJSON NgramsTerms = String "Terms" + +instance ToJSONKey NgramsType where + toJSONKey = toJSONKeyText (pack . show) +instance FromHttpApiData NgramsType where + parseUrlPiece n = pure $ (read . cs) n +instance ToHttpApiData NgramsType where + toUrlPiece = pack . show +instance ToParamSchema NgramsType where + toParamSchema _ = toParamSchema (Proxy :: Proxy TODO) +instance Arbitrary NgramsType where + arbitrary = elements [ minBound .. maxBound ] + + +ngramsTypes :: [NgramsType] +ngramsTypes = [minBound..] + +instance ToSchema NgramsType +{- where + declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_") +--} + + + +data Ngrams = UnsafeNgrams { _ngramsTerms :: Text + , _ngramsSize :: Int + } + deriving (Generic, Show, Eq, Ord) + +instance Hashable Ngrams + +makeLenses ''Ngrams +instance PGS.ToRow Ngrams where + toRow (UnsafeNgrams t s) = [toField t, toField s] + +------------------------------------------------------------------------ +------------------------------------------------------------------------- +-- Named entity are typed ngrams of Terms Ngrams +data NgramsT a = + NgramsT { _ngramsType :: NgramsType + , _ngramsT :: a + } deriving (Generic, Show, Eq, Ord) + +makeLenses ''NgramsT + +instance Functor NgramsT where + fmap = over ngramsT + diff --git a/src/Gargantext/Core/Text/Terms.hs b/src/Gargantext/Core/Text/Terms.hs index 68526aad6a552758d2c2ea1ae9e1219a5913e21c..2064b99cf1a1e122e9874bb40c26056a62fe998a 100644 --- a/src/Gargantext/Core/Text/Terms.hs +++ b/src/Gargantext/Core/Text/Terms.hs @@ -37,27 +37,27 @@ compute graph module Gargantext.Core.Text.Terms where -import Control.Lens +import Control.Lens ( (^.), view, over, makeLenses ) import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap import Data.List qualified as List import Data.Set qualified as Set import Data.Text qualified as Text -import Data.Traversable import GHC.Base (String) -import Gargantext.Core +import Gargantext.Core ( Lang, NLPServerConfig, PosTagAlgo ) import Gargantext.Core.Text (sentences, HasText(..)) +import Gargantext.Core.Text.Ngrams (Ngrams(..), NgramsType(..), ngramsTerms) import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken) import Gargantext.Core.Text.Terms.Mono (monoTerms) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..)) import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize) import Gargantext.Core.Text.Terms.Multi (multiterms) -import Gargantext.Core.Types +import Gargantext.Core.Types ( TermsCount, POS, Terms(Terms), TermsWithCount ) import Gargantext.Core.Utils (groupWithCounts) import Gargantext.Database.Prelude (DBCmd) import Gargantext.Database.Query.Table.Ngrams (insertNgrams) import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgramsPostag, np_form, np_lem) -import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..), ngramsTerms, text2ngrams, NgramsId) +import Gargantext.Database.Schema.Ngrams (text2ngrams, NgramsId) import Gargantext.Prelude data TermType lang @@ -103,8 +103,7 @@ withLang (Unsupervised {..}) ns = Unsupervised { _tt_model = m', .. } $ fmap toToken $ uniText $ Text.intercalate " . " - $ List.concat - $ map hasText ns + $ concatMap hasText ns just_m -> just_m withLang l _ = l @@ -126,7 +125,11 @@ class ExtractNgramsT h ------------------------------------------------------------------------ enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag enrichedTerms l pa po (Terms ng1 ng2) = - NgramsPostag l pa po form lem + NgramsPostag { _np_lang = l + , _np_algo = pa + , _np_postag = po + , _np_form = form + , _np_lem = lem } where form = text2ngrams $ Text.intercalate " " ng1 lem = text2ngrams $ Text.intercalate " " $ Set.toList ng2 @@ -138,7 +141,7 @@ cleanNgrams s ng | otherwise = text2ngrams (Text.take s (ng ^. ngramsTerms)) cleanExtractedNgrams :: Int -> ExtractedNgrams -> ExtractedNgrams -cleanExtractedNgrams s (SimpleNgrams ng) = SimpleNgrams $ (cleanNgrams s) ng +cleanExtractedNgrams s (SimpleNgrams ng) = SimpleNgrams $ cleanNgrams s ng cleanExtractedNgrams s (EnrichedNgrams ng) = EnrichedNgrams $ over np_form (cleanNgrams s) $ over np_lem (cleanNgrams s) ng @@ -156,8 +159,7 @@ insertExtractedNgrams ngs = do m2 <- insertNgramsPostag (map unEnrichedNgrams e) --printDebug "terms" m2 - let result = HashMap.union m1 m2 - pure result + pure $ HashMap.union m1 m2 isSimpleNgrams :: ExtractedNgrams -> Bool isSimpleNgrams (SimpleNgrams _) = True @@ -189,10 +191,10 @@ type MinNgramSize = Int termsUnsupervised :: TermType Lang -> Text -> [TermsWithCount] termsUnsupervised (Unsupervised { _tt_model = Nothing }) = panicTrace "[termsUnsupervised] no model" termsUnsupervised (Unsupervised { _tt_model = Just _tt_model, .. }) = - map (\(t, cnt) -> (text2term _tt_lang t, cnt)) + map (first (text2term _tt_lang)) . groupWithCounts -- . List.nub - . (List.filter (\l' -> List.length l' >= _tt_windowSize)) + . List.filter (\l' -> List.length l' >= _tt_windowSize) . List.concat . mainEleveWith _tt_model _tt_ngramsSize . uniText @@ -200,19 +202,18 @@ termsUnsupervised _ = undefined newTries :: Int -> Text -> Tries Token () -newTries n t = buildTries n (fmap toToken $ uniText t) +newTries n t = buildTries n (toToken <$> uniText t) -- | TODO removing long terms > 24 uniText :: Text -> [[Text]] -uniText = map (List.filter (not . isPunctuation)) - . map tokenize - . sentences -- TODO get sentences according to lang - . Text.toLower +uniText = map (List.filter (not . isPunctuation) . tokenize) + . sentences -- TODO get sentences according to lang + . Text.toLower text2term :: Lang -> [Text] -> Terms text2term _ [] = Terms [] Set.empty text2term lang txt = Terms txt (Set.fromList $ map (stem lang PorterAlgorithm) txt) isPunctuation :: Text -> Bool -isPunctuation x = List.elem x $ (Text.pack . pure) +isPunctuation x = List.elem x $ Text.pack . pure <$> ("!?(),;.:" :: String) diff --git a/src/Gargantext/Core/Text/Terms/Multi.hs b/src/Gargantext/Core/Text/Terms/Multi.hs index ad22aa3bba0f2754dcc5a6197d7c6400006df3b6..4d70ea8d7ba3de263f65dc5f6fb6b7f4fb8040fa 100644 --- a/src/Gargantext/Core/Text/Terms/Multi.hs +++ b/src/Gargantext/Core/Text/Terms/Multi.hs @@ -11,25 +11,21 @@ Multi-terms are ngrams where n > 1. -} -{-# LANGUAGE OverloadedStrings #-} - module Gargantext.Core.Text.Terms.Multi (multiterms, multiterms_rake, tokenTagsWith, tokenTags, cleanTextForNLP) where -import Control.Applicative -import Data.Attoparsec.Text as DAT -import Data.Text hiding (map, group, filter, concat) +import Data.Attoparsec.Text as DAT ( digit, space, notChar, string ) import Gargantext.Core (Lang(..), NLPServerConfig(..), PosTagAlgo(..)) import Gargantext.Core.Text.Terms.Multi.Lang.En qualified as En import Gargantext.Core.Text.Terms.Multi.Lang.Fr qualified as Fr -import Gargantext.Core.Text.Terms.Multi.PosTagging -import Gargantext.Core.Text.Terms.Multi.PosTagging.Types +import Gargantext.Core.Text.Terms.Multi.PosTagging ( corenlp, tokens2tokensTags ) +import Gargantext.Core.Text.Terms.Multi.PosTagging.Types ( PosSentences(_sentences), Sentence(_sentenceTokens) ) import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake) -import Gargantext.Core.Types +import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(TokenTag, _my_token_pos) ) import Gargantext.Core.Utils (groupWithCounts) import Gargantext.Prelude import Gargantext.Utils.SpacyNLP qualified as SpacyNLP -import Replace.Attoparsec.Text as RAT +import Replace.Attoparsec.Text as RAT ( streamEdit ) ------------------------------------------------------------------- type NLP_API = Lang -> Text -> IO PosSentences diff --git a/src/Gargantext/Core/Text/Terms/Multi/Group.hs b/src/Gargantext/Core/Text/Terms/Multi/Group.hs index 3c8448cbb31c7c0f8a83425523958258d919ba60..bcdfd867c2fb0a7544d350f09dae4cdb46239c56 100644 --- a/src/Gargantext/Core/Text/Terms/Multi/Group.hs +++ b/src/Gargantext/Core/Text/Terms/Multi/Group.hs @@ -16,7 +16,7 @@ group the tokens into extracted terms. module Gargantext.Core.Text.Terms.Multi.Group (group2) where -import Gargantext.Core.Types +import Gargantext.Core.Types ( POS, TokenTag(TokenTag) ) import Gargantext.Prelude -- | FIXME p1 and p2 not really taken into account diff --git a/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs b/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs index f0f29c5f54e96bdf5dc9f08e07776f3ea1ae0b1a..04817e54c0bf7071f287e8f5039a6779408ae98f 100644 --- a/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs +++ b/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs @@ -17,8 +17,8 @@ module Gargantext.Core.Text.Terms.Multi.Lang.En (groupTokens) where import Gargantext.Prelude -import Gargantext.Core.Types -import Gargantext.Core.Text.Terms.Multi.Group +import Gargantext.Core.Types ( POS(CC, IN, DT, NP, JJ), TokenTag ) +import Gargantext.Core.Text.Terms.Multi.Group ( group2 ) ------------------------------------------------------------------------ -- | Rule grammar to group tokens @@ -31,8 +31,7 @@ groupTokens ntags = group2 NP NP -- $ group2 VB NP $ group2 JJ NP $ group2 JJ JJ - $ group2 JJ CC - $ ntags + $ group2 JJ CC ntags ------------------------------------------------------------------------ --groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs) diff --git a/src/Gargantext/Core/Viz/Chart.hs b/src/Gargantext/Core/Viz/Chart.hs index ba88b348c4c415a8a7c11291ff9a8819a517a209..7559feb6a4663ff93b1d819ce3c244f0cf38c05e 100644 --- a/src/Gargantext/Core/Viz/Chart.hs +++ b/src/Gargantext/Core/Viz/Chart.hs @@ -9,32 +9,31 @@ Portability : POSIX -} -{-# LANGUAGE TemplateHaskell #-} - module Gargantext.Core.Viz.Chart where import Data.HashMap.Strict qualified as HashMap import Data.List qualified as List import Data.Map.Strict (toList) +import Data.Set qualified as Set import Data.Vector qualified as V -import Gargantext.API.Ngrams.NgramsTree -import Gargantext.API.Ngrams.Tools -import Gargantext.API.Ngrams.Types -import Gargantext.Core.NodeStory (HasNodeStory) +import Gargantext.API.Ngrams.NgramsTree ( toTree, NgramsTree ) +import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getListNgrams, getRepo, mapTermListRoot ) +import Gargantext.API.Ngrams.Types ( NgramsTerm(NgramsTerm) ) +import Gargantext.Core.NodeStory.Types ( HasNodeStory ) import Gargantext.Core.Text.Metrics.Count (occurrencesWith) -import Gargantext.Core.Types -import Gargantext.Core.Viz.Types -import Gargantext.Database.Action.Metrics.NgramsByContext -import Gargantext.Database.Admin.Config +import Gargantext.Core.Text.Ngrams (NgramsType) +import Gargantext.Core.Types.Main ( ListType ) +import Gargantext.Database.Admin.Types.Node ( NodeType(NodeList), CorpusId, contextId2NodeId ) +import Gargantext.Core.Viz.Types ( Histo(Histo) ) +import Gargantext.Database.Action.Metrics.NgramsByContext ( countContextsByNgramsWith, getContextsByNgramsOnlyUser ) +import Gargantext.Database.Admin.Config ( userMaster ) import Gargantext.Database.Prelude (DBCmd) -import Gargantext.Database.Query.Table.Node -import Gargantext.Database.Query.Table.Node.Select +import Gargantext.Database.Query.Table.Node ( getListsWithParentId ) +import Gargantext.Database.Query.Table.Node.Select ( selectNodesWithUsername ) import Gargantext.Database.Query.Table.NodeContext (selectDocsDates) -import Gargantext.Database.Schema.Ngrams -import Gargantext.Database.Schema.Node +import Gargantext.Database.Schema.Node ( NodePoly(_node_id) ) import Gargantext.Prelude hiding (toList) -import qualified Data.Set as Set histoData :: CorpusId -> DBCmd err Histo diff --git a/src/Gargantext/Core/Viz/Graph.hs b/src/Gargantext/Core/Viz/Graph.hs index 8f045b021efde73e63794b2cc0a1d63f030f0306..d0335090897ce1165d29714ee14d981bb85aeb52 100644 --- a/src/Gargantext/Core/Viz/Graph.hs +++ b/src/Gargantext/Core/Viz/Graph.hs @@ -9,8 +9,6 @@ Portability : POSIX -} -{-# LANGUAGE TemplateHaskell #-} - module Gargantext.Core.Viz.Graph where @@ -19,9 +17,9 @@ import Data.ByteString.Lazy as DBL (readFile, writeFile) import Data.HashMap.Strict (HashMap, lookup) import Data.Text qualified as Text import Gargantext.API.Ngrams.Types (NgramsTerm(..), NgramsRepoElement(..), mSetToList) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Viz.Graph.Types import Gargantext.Database.Admin.Types.Hyperdata.Prelude -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Prelude import Text.Read qualified as Text diff --git a/src/Gargantext/Core/Viz/Graph/API.hs b/src/Gargantext/Core/Viz/Graph/API.hs index 9729109d2fe9eeaaf588db3be98a0c6bd146af90..241dd0cf1dd5b391b931ffd85f32c098accbf784 100644 --- a/src/Gargantext/Core/Viz/Graph/API.hs +++ b/src/Gargantext/Core/Viz/Graph/API.hs @@ -19,31 +19,31 @@ module Gargantext.Core.Viz.Graph.API where import Control.Lens (set, (^.), _Just, (^?), at) -import Data.Aeson +import Data.Aeson ( ToJSON, FromJSON ) import Data.HashMap.Strict qualified as HashMap -import Data.Swagger +import Data.Swagger ( ToSchema ) import Gargantext.API.Admin.EnvTypes (GargJob(..), Env) -import Gargantext.API.Admin.Orchestrator.Types -import Gargantext.API.Errors.Types +import Gargantext.API.Admin.Orchestrator.Types ( JobLog ) +import Gargantext.API.Errors.Types ( BackendInternalError ) import Gargantext.API.Ngrams.Tools -import Gargantext.API.Prelude +import Gargantext.API.Prelude (GargM, GargServer) import Gargantext.Core.Methods.Similarities (Similarity(..), GraphMetric(..), withMetric) -import Gargantext.Core.NodeStory -import Gargantext.Core.Types.Main +import Gargantext.Core.NodeStory.Types ( HasNodeStory, a_version, unNodeStory, NodeListStory ) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) +import Gargantext.Core.Types.Main ( ListType(MapTerm) ) import Gargantext.Core.Viz.Graph.GEXF () import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph) import Gargantext.Core.Viz.Graph.Types import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser) import Gargantext.Database.Action.Node (mkNodeWithParent) -import Gargantext.Database.Admin.Config +import Gargantext.Database.Admin.Config ( userMaster ) import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Prelude (DBCmd) -import Gargantext.Database.Query.Table.Node +import Gargantext.Database.Query.Table.Node ( getOrMkList, getNodeWith, defaultList, getClosestParentIdByType ) import Gargantext.Database.Query.Table.Node.Error (HasNodeError) -import Gargantext.Database.Query.Table.Node.Select +import Gargantext.Database.Query.Table.Node.Select ( selectNodesWithUsername ) import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata) -import Gargantext.Database.Schema.Ngrams -import Gargantext.Database.Schema.Node +import Gargantext.Database.Schema.Node (node_hyperdata, node_name) import Gargantext.Prelude import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Servant diff --git a/src/Gargantext/Core/Viz/Graph/Tools.hs b/src/Gargantext/Core/Viz/Graph/Tools.hs index 9991b65f09b3a01555dbc780814838cebf84b38a..b6877a67c1d1169dca532eefc9c30e9856bdfa62 100644 --- a/src/Gargantext/Core/Viz/Graph/Tools.hs +++ b/src/Gargantext/Core/Viz/Graph/Tools.hs @@ -11,39 +11,40 @@ Portability : POSIX {-# OPTIONS_GHC -fno-warn-deprecations #-} -{-# LANGUAGE BangPatterns, ScopedTypeVariables #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} module Gargantext.Core.Viz.Graph.Tools where -import Data.Aeson +import Data.Aeson ( ToJSON, FromJSON ) import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap import Data.HashSet qualified as HashSet import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Set qualified as Set -import Data.Swagger hiding (items) +import Data.Swagger ( ToSchema ) import Data.Text qualified as Text import Data.Vector.Storable qualified as Vec import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.Core.Methods.Similarities (Similarity(..), measure) -import Gargantext.Core.Statistics +import Gargantext.Core.Statistics ( pcaReduceTo, Dimension(Dimension) ) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes) import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..)) import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass') import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap) import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..)) import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Prelude import Graph.BAC.ProxemyOptim qualified as BAC import Graph.Types (ClusterNode) import IGraph qualified as Igraph import IGraph.Algorithms.Layout qualified as Layout -import IGraph.Random -- (Gen(..)) +import IGraph.Random ( Gen ) -- (Gen(..)) import Test.QuickCheck (elements) -import Test.QuickCheck.Arbitrary +import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) ) data PartitionMethod = Spinglass | Confluence | Infomap deriving (Generic, Eq, Ord, Enum, Bounded, Show) diff --git a/src/Gargantext/Core/Viz/Graph/Types.hs b/src/Gargantext/Core/Viz/Graph/Types.hs index 03ead0ff675bd40dfc2cdfdc126203ce1539234a..9b7ae84707cd6e4013d8bd8576512ebc6ba702b5 100644 --- a/src/Gargantext/Core/Viz/Graph/Types.hs +++ b/src/Gargantext/Core/Viz/Graph/Types.hs @@ -24,12 +24,11 @@ import Data.Text (pack) import Database.PostgreSQL.Simple.FromField (FromField(..)) import Gargantext.API.Ngrams.Types (NgramsTerm) import Gargantext.Core.Methods.Similarities (GraphMetric) -import Gargantext.Core.Types (ListId) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Database.Admin.Types.Hyperdata.Prelude (Hyperdata) -import Gargantext.Database.Admin.Types.Node (NodeId) +import Gargantext.Database.Admin.Types.Node (ListId, NodeId) import Gargantext.Database.Prelude (fromField') -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Prelude import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField) import Test.QuickCheck (elements) diff --git a/src/Gargantext/Core/Viz/Phylo/API/Tools.hs b/src/Gargantext/Core/Viz/Phylo/API/Tools.hs index d2a5e5bb9914b7a233ae8d9f932e2195ca6145cf..c6989d29242397ad0f01d543a32e176842034a07 100644 --- a/src/Gargantext/Core/Viz/Phylo/API/Tools.hs +++ b/src/Gargantext/Core/Viz/Phylo/API/Tools.hs @@ -11,18 +11,17 @@ Portability : POSIX {-# OPTIONS_GHC -fno-warn-deprecations #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} module Gargantext.Core.Viz.Phylo.API.Tools where -import Control.Lens hiding (Context) +import Control.Lens (to, view) import Data.Aeson (Value, decodeFileStrict, encode, eitherDecodeFileStrict') import Data.ByteString.Lazy qualified as Lazy import Data.Map.Strict qualified as Map -import Data.Proxy import Data.Set qualified as Set import Data.Text (pack) import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian) @@ -30,31 +29,31 @@ import Data.Time.Clock.POSIX(posixSecondsToUTCTime) import Gargantext.API.Ngrams.Prelude (getTermList) import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.Core (withDefaultLanguage, Lang) -import Gargantext.Core.NodeStory (HasNodeStory) +import Gargantext.Core.NodeStory.Types (HasNodeStory) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText) -import Gargantext.Core.Types (Context, nodeId2ContextId) import Gargantext.Core.Types.Main (ListType(MapTerm)) import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo) import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink) import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig) -import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..), HyperdataCorpus(..)) +import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) -import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId) +import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(..) ) +import Gargantext.Database.Admin.Types.Node (Context, CorpusId, ContextId, PhyloId, nodeId2ContextId) import Gargantext.Database.Prelude (DBCmd) import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith) import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) -import Gargantext.Database.Schema.Context -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) -import Gargantext.Database.Schema.Node +import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _context_id) ) +import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata ) import Gargantext.Prelude hiding (to) -import Gargantext.System.Logging +import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM ) +import Gargantext.Utils.UTCTime (timeMeasured) import Prelude qualified import System.FilePath ((</>)) import System.IO.Temp (withTempDirectory) import System.Process qualified as Shell -import Gargantext.Utils.UTCTime (timeMeasured) -------------------------------------------------------------------- getPhyloData :: HasNodeError err diff --git a/src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs b/src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs index 1644a853dea8b114c0f954091da514a0dd4363d9..2e2a59b4d479762d3a06c7d9e191197a39c321c1 100644 --- a/src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs +++ b/src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs @@ -13,30 +13,30 @@ Portability : POSIX {-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE TypeApplications #-} -{-# LANGUAGE ViewPatterns #-} module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain where -import Control.Lens hiding (Level) +import Control.Lens (to, view) import Data.HashMap.Strict qualified as HashMap import Data.List qualified as List -import Data.Proxy import Data.Set qualified as Set import Data.Text qualified as Text import Gargantext.API.Ngrams.Tools (getTermsWith) -import Gargantext.API.Ngrams.Types +import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.Core (HasDBid, withDefaultLanguage) -import Gargantext.Core.NodeStory (HasNodeStory) +import Gargantext.Core.NodeStory.Types (HasNodeStory) import Gargantext.Core.Text.Context (TermList) -import Gargantext.Core.Text.Terms.WithList -import Gargantext.Core.Types +import Gargantext.Core.Text.Ngrams (NgramsType(..)) +import Gargantext.Core.Text.Terms.WithList ( buildPatterns, termsInText, Patterns ) +import Gargantext.Core.Types.Main ( ListType(MapTerm) ) +import Gargantext.Database.Admin.Types.Node ( NodeType, CorpusId ) import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot) -import Gargantext.Database.Admin.Types.Hyperdata +import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(_hc_lang) ) +import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(_hd_abstract, _hd_publication_year) ) import Gargantext.Database.Query.Table.Node(defaultList, getNodeWith) import Gargantext.Database.Query.Table.NodeContext (selectDocs) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) -import Gargantext.Database.Schema.Node +import Gargantext.Database.Schema.Node ( node_hyperdata ) import Gargantext.Prelude hiding (to) type MinSizeBranch = Int diff --git a/src/Gargantext/Database/Action/Flow.hs b/src/Gargantext/Database/Action/Flow.hs index 2c767f1dfbfe955527513588e49e435ed7deefe2..03b8eee79edc5a0c66d48298cb54b68bf279c2ad 100644 --- a/src/Gargantext/Database/Action/Flow.hs +++ b/src/Gargantext/Database/Action/Flow.hs @@ -51,7 +51,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) where import Conduit -import Control.Lens hiding (elements, Indexed) +import Control.Lens ( (^.), to, view, over ) import Data.Bifunctor qualified as B import Data.Conduit qualified as C import Data.Conduit.Internal (zipSources) @@ -60,56 +60,57 @@ import Data.Conduit.List qualified as CList import Data.HashMap.Strict qualified as HashMap import Data.List qualified as List import Data.Map.Strict qualified as Map -import Data.Proxy import Data.Set qualified as Set import Data.Text qualified as T import EPO.API.Client.Types qualified as EPO import Gargantext.API.Ngrams.Tools (getTermsWith) -import Gargantext.Core (Lang(..), NLPServerConfig) -import Gargantext.Core (withDefaultLanguage) +import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms), Ngrams(_ngramsTerms)) +import Gargantext.Core (Lang(..), NLPServerConfig, withDefaultLanguage) import Gargantext.Core.Ext.IMTUser (readFile_Annuaire) import Gargantext.Core.NLP (HasNLPServer, nlpServerGet) -import Gargantext.Core.NodeStory (HasNodeStory) +import Gargantext.Core.NodeStory.Types (HasNodeStory) import Gargantext.Core.Text.Corpus.API qualified as API import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType) import Gargantext.Core.Text.List (buildNgramsLists) -import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..)) +import Gargantext.Core.Text.List.Group.WithStem (GroupParams(..)) import Gargantext.Core.Text.List.Social (FlowSocialListWith(..)) import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..)) import Gargantext.Core.Types (HasValidationError, TermsCount) import Gargantext.Core.Types.Individu (User(..)) -import Gargantext.Core.Types.Main +import Gargantext.Core.Types.Main ( CorpusName, ListType(MapTerm) ) import Gargantext.Core.Types.Query (Limit) import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances -import Gargantext.Database.Action.Flow.List +import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' ) +import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus ) import Gargantext.Database.Action.Flow.Utils (docNgrams, documentIdWithNgrams, insertDocNgrams, insertDocs, mapNodeIdNgrams) -import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore) import Gargantext.Database.Action.Search (searchDocInDatabase) import Gargantext.Database.Admin.Config (userMaster, corpusMasterName) -import Gargantext.Database.Admin.Types.Hyperdata +import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact ) +import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus(_hc_lang) ) +import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(toHyperdataDocument) ) import Gargantext.Database.Admin.Types.Node hiding (DEBUG) -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) import Gargantext.Database.Prelude (DbCmd', DBCmd, hasConfig) -import Gargantext.Database.Query.Table.ContextNodeNgrams2 -import Gargantext.Database.Query.Table.Ngrams -import Gargantext.Database.Query.Table.Node +import Gargantext.Database.Query.Table.ContextNodeNgrams2 ( ContextNodeNgrams2Poly(..), insertContextNodeNgrams2 ) +import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotExists, getOrMkList, getNodeWith ) import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add) -import Gargantext.Database.Query.Table.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) +import Gargantext.Database.Query.Table.Node.Document.Insert ( ToNode(toNode) ) -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..)) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId) import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus) +import Gargantext.Database.Schema.Ngrams ( indexNgrams, text2ngrams ) import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Prelude hiding (to) import Gargantext.Prelude.Config (GargConfig(..)) -import Gargantext.System.Logging -import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..)) +import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG), MonadLogger ) +import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) ) import PUBMED.Types qualified as PUBMED ------------------------------------------------------------------------ -- Imports for upgrade function -import Gargantext.Database.Query.Tree (HasTreeError) +import Gargantext.Database.Query.Tree.Error ( HasTreeError ) ------------------------------------------------------------------------ @@ -182,11 +183,11 @@ flowDataText u (DataOld ids) tt cid mfslw _ = do _ <- Doc.add userCorpusId (map nodeId2ContextId ids) flowCorpusUser (_tt_lang tt) u userCorpusId listId corpusType mfslw where - corpusType = (Nothing :: Maybe HyperdataCorpus) + corpusType = Nothing :: Maybe HyperdataCorpus flowDataText u (DataNew (mLen, txtC)) tt cid mfslw jobHandle = do $(logLocM) DEBUG $ T.pack $ "Found " <> show mLen <> " new documents to process" for_ (mLen <&> fromInteger) (`addMoreSteps` jobHandle) - flowCorpus u (Right [cid]) tt mfslw (fromMaybe 0 mLen, (transPipe liftBase txtC)) jobHandle + flowCorpus u (Right [cid]) tt mfslw (fromMaybe 0 mLen, transPipe liftBase txtC) jobHandle ------------------------------------------------------------------------ -- TODO use proxy @@ -199,13 +200,13 @@ flowAnnuaire :: ( DbCmd' env err m , MonadJobStatus m ) => User -> Either CorpusName [CorpusId] - -> (TermType Lang) + -> TermType Lang -> FilePath -> JobHandle m -> m AnnuaireId flowAnnuaire u n l filePath jobHandle = do -- TODO Conduit for file - docs <- liftBase $ ((readFile_Annuaire filePath) :: IO [HyperdataContact]) + docs <- liftBase $ (readFile_Annuaire filePath :: IO [HyperdataContact]) flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing (fromIntegral $ length docs, yieldMany docs) jobHandle ------------------------------------------------------------------------ @@ -362,10 +363,11 @@ flowCorpusUser l user userCorpusId listId ctype mfslw = do _ <- reIndexWith userCorpusId listId NgramsTerms (Set.singleton MapTerm) _ <- updateContextScore userCorpusId listId _ <- updateNgramsOccurrences userCorpusId listId - + pure userCorpusId +-- | This function is responsible for contructing terms. buildSocialList :: ( HasNodeError err , HasValidationError err , HasNLPServer env @@ -389,8 +391,12 @@ buildSocialList l user userCorpusId listId ctype mfslw = do nlpServer <- view (nlpServerGet l) --let gp = (GroupParams l 2 3 (StopSize 3)) -- Here the PosTagAlgo should be chosen according to the Lang - ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw - $ GroupWithPosTag l nlpServer HashMap.empty + -- let gp = GroupParams { unGroupParams_lang = l + -- , unGroupParams_len = 10 + -- , unGroupParams_limit = 10 + -- , unGroupParams_stopSize = StopSize 10 } + let gp = GroupWithPosTag l nlpServer HashMap.empty + ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp -- printDebug "flowCorpusUser:ngs" ngs @@ -425,7 +431,7 @@ insertMasterDocs ncs c lang hs = do (extractNgramsT ncs $ withLang lang documentsWithId) (map (B.first contextId2NodeId) documentsWithId) - lId <- getOrMkList masterCorpusId masterUserId + lId <- getOrMkList masterCorpusId masterUserId -- _ <- saveDocNgramsWith lId mapNgramsDocs' _ <- saveDocNgramsWith lId mapNgramsDocs' @@ -445,13 +451,13 @@ saveDocNgramsWith lId mapNgramsDocs' = do -- new mapCgramsId <- listInsertDb lId toNodeNgramsW' - $ map (first _ngramsTerms . second Map.keys) + $ map (bimap _ngramsTerms Map.keys) $ HashMap.toList mapNgramsDocs --printDebug "saveDocNgramsWith" mapCgramsId -- insertDocNgrams - let ngrams2insert = catMaybes [ ContextNodeNgrams2 <$> Just (nodeId2ContextId nId) - <*> (getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')) + let ngrams2insert = catMaybes [ ContextNodeNgrams2 (nodeId2ContextId nId) + <$> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'') <*> Just (fromIntegral w :: Double) | (terms'', mapNgramsTypes) <- HashMap.toList mapNgramsDocs , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes @@ -498,5 +504,5 @@ reIndexWith cId lId nt lts = do $ map (docNgrams corpusLang nt ts) docs -- Saving the indexation in database - _ <- mapM (saveDocNgramsWith lId) ngramsByDoc + mapM_ (saveDocNgramsWith lId) ngramsByDoc pure () diff --git a/src/Gargantext/Database/Action/Flow/Extract.hs b/src/Gargantext/Database/Action/Flow/Extract.hs index 2b03eb100c13003d98d057144f5e1352b246a674..e993c9cad9624c4681965967225f2ab6d2df82b5 100644 --- a/src/Gargantext/Database/Action/Flow/Extract.hs +++ b/src/Gargantext/Database/Action/Flow/Extract.hs @@ -20,15 +20,17 @@ module Gargantext.Database.Action.Flow.Extract import Control.Lens ((^.), _Just, view) import Data.HashMap.Strict qualified as HashMap import Data.Map.Strict qualified as DM -import Gargantext.Core (Lang, NLPServerConfig, PosTagAlgo(CoreNLP)) +import Gargantext.Core (Lang, NLPServerConfig(server)) import Gargantext.Core.Text (HasText(..)) import Gargantext.Core.Text.Corpus.Parsers (splitOn) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Terms (ExtractNgramsT, ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractNgramsT, extractTerms, tt_lang) import Gargantext.Core.Types (POS(NP), TermsCount) -import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact, HyperdataDocument, cw_lastName, hc_who, hd_authors, hd_bdd, hd_institutes, hd_source) -import Gargantext.Database.Admin.Types.Node +import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who ) +import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source ) +import Gargantext.Database.Admin.Types.Node ( Node ) import Gargantext.Database.Prelude (DBCmd) -import Gargantext.Database.Schema.Ngrams +import Gargantext.Database.Schema.Ngrams ( text2ngrams ) import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Prelude @@ -49,6 +51,9 @@ instance ExtractNgramsT HyperdataContact pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ] +-- | Main ngrams extraction functionality. +-- For NgramsTerms, this calls NLP server under the hood. +-- For Sources, Institutes, Authors, this uses simple split on " ". instance ExtractNgramsT HyperdataDocument where extractNgramsT :: NLPServerConfig @@ -72,9 +77,8 @@ instance ExtractNgramsT HyperdataDocument $ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd)) $ doc ^. hd_authors - termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang ^. tt_lang) CoreNLP NP t, cnt)) - <$> concat - <$> liftBase (extractTerms ncs lang $ hasText doc) + termsWithCounts' <- map (first (enrichedTerms (lang ^. tt_lang) (server ncs) NP)) . concat <$> + liftBase (extractTerms ncs lang $ hasText doc) pure $ HashMap.fromList $ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ] diff --git a/src/Gargantext/Database/Action/Flow/List.hs b/src/Gargantext/Database/Action/Flow/List.hs index 7abbbf8a1c93f0f9d62ebb5a6984f428c22670a0..028abec206f5a717e9f2401392928fe72b4913ea 100644 --- a/src/Gargantext/Database/Action/Flow/List.hs +++ b/src/Gargantext/Database/Action/Flow/List.hs @@ -11,8 +11,6 @@ Portability : POSIX {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstrainedClassMethods #-} -{-# LANGUAGE ConstraintKinds #-} -{-# LANGUAGE InstanceSigs #-} module Gargantext.Database.Action.Flow.List where @@ -27,12 +25,11 @@ import Gargantext.API.Ngrams (saveNodeStory) import Gargantext.API.Ngrams.Tools (getNodeStory) import Gargantext.API.Ngrams.Types import Gargantext.Core.NodeStory (HasNodeStory, a_history, a_state, a_version) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Types (HasValidationError(..), assertValid) import Gargantext.Core.Types.Main (ListType(CandidateTerm)) import Gargantext.Database.Admin.Types.Node -import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb,{- getCgramsId -}) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Prelude hiding (toList) -- FLOW LIST @@ -169,7 +166,7 @@ listInsert lId ngs = mapM_ (\(typeList, ngElmts) -- the repo, they will be ignored. putListNgrams :: (HasValidationError err, HasNodeStory env err m) => NodeId - -> TableNgrams.NgramsType + -> NgramsType -> [NgramsElement] -> m () putListNgrams _ _ [] = pure () @@ -179,7 +176,7 @@ putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m putListNgrams' :: (HasValidationError err, HasNodeStory env err m) => NodeId - -> TableNgrams.NgramsType + -> NgramsType -> Map NgramsTerm NgramsRepoElement -> m () putListNgrams' listId ngramsType' ns = do diff --git a/src/Gargantext/Database/Action/Flow/Pairing.hs b/src/Gargantext/Database/Action/Flow/Pairing.hs index f113637308c65ccb9879d0453cdc6f6efa3eb0d7..a02ebc78ae504a3ac3243df49e8ce53a143f554e 100644 --- a/src/Gargantext/Database/Action/Flow/Pairing.hs +++ b/src/Gargantext/Database/Action/Flow/Pairing.hs @@ -11,7 +11,6 @@ Portability : POSIX {-# OPTIONS_GHC -fno-warn-deprecations #-} -{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE Arrows #-} module Gargantext.Database.Action.Flow.Pairing @@ -25,28 +24,27 @@ import Data.HashMap.Strict qualified as HashMap import Data.List qualified as List import Data.Set qualified as Set import Data.Text qualified as Text -import Gargantext.API.Ngrams.Tools +import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getRepo, groupNodesByNgrams, mapTermListRoot ) import Gargantext.API.Ngrams.Types (NgramsTerm(..)) -import Gargantext.Core -import Gargantext.Core.NodeStory (HasNodeStory) +import Gargantext.Core ( HasDBid(toDBid) ) +import Gargantext.Core.NodeStory.Types ( HasNodeStory ) import Gargantext.Core.Text.Metrics.CharByChar (levenshtein) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Types (TableResult(..)) -import Gargantext.Core.Types.Main -import Gargantext.Database +import Gargantext.Core.Types.Main ( ListType(CandidateTerm, MapTerm) ) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser) -import Gargantext.Database.Admin.Config -import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..)) +import Gargantext.Database.Admin.Config ( userMaster ) +import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_firstName, cw_lastName, hc_who ) -- (HyperdataContact(..)) import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId) -import Gargantext.Database.Query.Prelude (returnA, queryNodeNodeTable) +import Gargantext.Database.Prelude (Cmd, DBCmd, runOpaQuery) +import Gargantext.Database.Query.Prelude (returnA) import Gargantext.Database.Query.Table.Node (defaultList) import Gargantext.Database.Query.Table.Node.Children (getAllContacts) import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername) import Gargantext.Database.Query.Table.NodeContext_NodeContext (insertNodeContext_NodeContext) -import Gargantext.Database.Query.Table.NodeNode (insertNodeNode) -import Gargantext.Database.Prelude (Cmd, runOpaQuery) -import Gargantext.Database.Schema.Ngrams -- (NgramsType(..)) -import Gargantext.Database.Schema.Node +import Gargantext.Database.Query.Table.NodeNode +import Gargantext.Database.Schema.Node ( node_hyperdata, node_id, node_typename, queryNodeTable ) import Gargantext.Prelude hiding (sum) import Opaleye diff --git a/src/Gargantext/Database/Action/Flow/Types.hs b/src/Gargantext/Database/Action/Flow/Types.hs index 0577138529e2e2d69d24eb4f4ad68a3c0c298518..8193e8047137b99e0c47ac55c3f8e940e4d1220f 100644 --- a/src/Gargantext/Database/Action/Flow/Types.hs +++ b/src/Gargantext/Database/Action/Flow/Types.hs @@ -23,23 +23,23 @@ import Data.Aeson (ToJSON) import Data.Aeson.TH (deriveJSON) import Data.HashMap.Strict (HashMap) import Data.Swagger (ToSchema(..), genericDeclareNamedSchema) -import Gargantext.Core.Flow.Types -import Gargantext.Core.NodeStory -import Gargantext.Core.Text -import Gargantext.Core.Text.Corpus.API qualified as API -import Gargantext.Core.Text.Terms +import Gargantext.Core.Flow.Types ( UniqId ) +import Gargantext.Core.NodeStory.Types ( HasNodeStory ) +import Gargantext.Core.Text ( HasText ) +import Gargantext.API.Admin.Orchestrator.Types qualified as API +import Gargantext.Core.Text.Ngrams (NgramsType(..)) +import Gargantext.Core.Text.Terms ( ExtractNgramsT ) import Gargantext.Core.Types (HasValidationError, TermsCount) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) -import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument) +import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument ) import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Prelude (CmdM) -import Gargantext.Database.Query.Table.Node.Document.Insert +import Gargantext.Database.Query.Table.Node.Document.Insert ( UniqParameters, InsertDb, ToNode, AddUniqId ) import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Tree.Error (HasTreeError) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Types (Indexed) import Gargantext.Prelude -import Gargantext.System.Logging +import Gargantext.System.Logging ( MonadLogger ) type FlowCmdM env err m = diff --git a/src/Gargantext/Database/Action/Flow/Utils.hs b/src/Gargantext/Database/Action/Flow/Utils.hs index cee9ef37df550c4f8411668bf75cd6ca340d71ad..76651b72771bc329f462985fd283dbf4b35ef67a 100644 --- a/src/Gargantext/Database/Action/Flow/Utils.hs +++ b/src/Gargantext/Database/Action/Flow/Utils.hs @@ -11,9 +11,6 @@ Portability : POSIX {-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE InstanceSigs #-} - - module Gargantext.Database.Action.Flow.Utils ( docNgrams , documentIdWithNgrams @@ -31,12 +28,13 @@ import Data.Text qualified as T import Gargantext.API.Ngrams.Types qualified as NT import Gargantext.Core (Lang, toDBid) import Gargantext.Core.Flow.Types (UniqId, uniqId) +import Gargantext.Core.Text.Ngrams ( Ngrams, NgramsType ) import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatternsWith, termsInText) import Gargantext.Core.Types (TermsCount) import Gargantext.Core.Utils (addTuples) import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap import Gargantext.Database.Action.Flow.Types (DocumentIdWithNgrams(..), FlowInsertDB) -import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument, hd_abstract, hd_title) +import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_abstract, hd_title ) import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Prelude (DBCmd, DbCmd') import Gargantext.Database.Query.Table.ContextNodeNgrams @@ -44,8 +42,8 @@ import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add) import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId) import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..)) import Gargantext.Database.Schema.Context (context_hyperdata, context_id) -import Gargantext.Database.Schema.Ngrams -import Gargantext.Database.Types +import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..)) +import Gargantext.Database.Types ( Indexed(..), index ) import Gargantext.Prelude import Gargantext.Prelude.Crypto.Hash (Hash) diff --git a/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs b/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs index c98b3d893045af4988259f1343e41c0f2705415e..c478bcfd6a226bc0b089f6b494f03be9ab72545b 100644 --- a/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs +++ b/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs @@ -27,11 +27,12 @@ import Database.PostgreSQL.Simple.ToField qualified as DPS import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types qualified as DPST import Gargantext.API.Ngrams.Types (NgramsTerm(..)) -import Gargantext.Core -import Gargantext.Data.HashMap.Strict.Utils as HM +import Gargantext.Core ( HasDBid(toDBid) ) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) +import Gargantext.Data.HashMap.Strict.Utils as HM ( unionsWith ) import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId (..), MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId) import Gargantext.Database.Prelude (DBCmd, runPGSQuery) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) +import Gargantext.Database.Schema.Ngrams () -- toDBid instance import Gargantext.Prelude -- | fst is size of Supra Corpus diff --git a/src/Gargantext/Database/Action/Metrics/TFICF.hs b/src/Gargantext/Database/Action/Metrics/TFICF.hs index 4af79a25cc7dab8a3483770c380e45c39223b226..d4401fbde1caee8adf2577a6770efd7ef4e17cd5 100644 --- a/src/Gargantext/Database/Action/Metrics/TFICF.hs +++ b/src/Gargantext/Database/Action/Metrics/TFICF.hs @@ -9,8 +9,6 @@ Portability : POSIX -} -{-# LANGUAGE QuasiQuotes #-} - module Gargantext.Database.Action.Metrics.TFICF where @@ -20,11 +18,11 @@ import Data.Set qualified as Set import Gargantext.API.Ngrams.Types import Gargantext.Core import Gargantext.Core.Text.Metrics.TFICF +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, {-getOccByNgramsOnlyFast,-} getOccByNgramsOnlyFast_withSample) import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId) import Gargantext.Database.Prelude (DBCmd) import Gargantext.Database.Query.Table.NodeContext (selectCountDocs) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Prelude {- diff --git a/src/Gargantext/Database/Action/Search.hs b/src/Gargantext/Database/Action/Search.hs index 2ccc2cc1147cec496af2e943c756aa197d8212f5..491d77c48dac53551b65373a09a7b28a10744c24 100644 --- a/src/Gargantext/Database/Action/Search.hs +++ b/src/Gargantext/Database/Action/Search.hs @@ -23,7 +23,7 @@ module Gargantext.Database.Action.Search ( import Control.Arrow (returnA) import Control.Lens ((^.), view) -import Data.BoolExpr +import Data.BoolExpr ( BoolExpr(..), Signed(Negative, Positive) ) import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Profunctor.Product (p4) @@ -31,25 +31,26 @@ import Data.Set qualified as Set import Data.Text (unpack) import Data.Text qualified as T import Data.Time (UTCTime) -import Gargantext.Core +import Gargantext.Core ( Lang(EN), HasDBid(toDBid) ) import Gargantext.Core.Text.Corpus.Query qualified as API import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..)) +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Types import Gargantext.Core.Types.Query (IsTrash, Limit, Offset) -import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..)) +import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact(..) ) +import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Prelude (runOpaQuery, runCountOpaQuery, DBCmd) import Gargantext.Database.Query.Facet -import Gargantext.Database.Query.Filter -import Gargantext.Database.Query.Table.Context +import Gargantext.Database.Query.Filter ( limit', offset' ) +import Gargantext.Database.Query.Table.Context ( queryContextSearchTable ) import Gargantext.Database.Query.Table.ContextNodeNgrams (queryContextNodeNgramsTable) -import Gargantext.Database.Query.Table.Node +import Gargantext.Database.Query.Table.Node ( queryNodeSearchTable, defaultList ) import Gargantext.Database.Query.Table.Node.Error (HasNodeError()) import Gargantext.Database.Query.Table.NodeContext -import Gargantext.Database.Query.Table.NodeContext_NodeContext +import Gargantext.Database.Schema.NodeContext_NodeContext ( NodeContext_NodeContextRead, queryNodeContext_NodeContextTable, ncnc_nodecontext2, ncnc_nodecontext1 ) import Gargantext.Database.Schema.Context import Gargantext.Database.Schema.ContextNodeNgrams (ContextNodeNgramsPoly(..)) -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) -import Gargantext.Database.Schema.Node +import Gargantext.Database.Schema.Node ( NodePolySearch(_ns_hyperdata, _ns_search, _ns_typename, _ns_id) ) import Gargantext.Prelude hiding (groupBy) import Opaleye hiding (Order) import Opaleye qualified as O hiding (Order) @@ -59,7 +60,7 @@ import Opaleye qualified as O hiding (Order) -- queryToTsSearch :: API.Query -> Field SqlTSQuery -queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST) +queryToTsSearch q = sqlToTSQuery $ T.unpack $ API.interpretQuery q transformAST where -- It's important to understand how things work under the hood: When we perform diff --git a/src/Gargantext/Database/Query/Table/Ngrams.hs b/src/Gargantext/Database/Query/Table/Ngrams.hs index 70bdf5ce539324a99d5e6a33543cb5db2ba2b9fc..c2b73e306cc290a3f3a16682b7ce2054ab5c1491 100644 --- a/src/Gargantext/Database/Query/Table/Ngrams.hs +++ b/src/Gargantext/Database/Query/Table/Ngrams.hs @@ -13,7 +13,6 @@ Portability : POSIX {-# LANGUAGE Arrows #-} {-# LANGUAGE QuasiQuotes #-} -{-# LANGUAGE TemplateHaskell #-} module Gargantext.Database.Query.Table.Ngrams ( module Gargantext.Database.Schema.Ngrams @@ -30,6 +29,7 @@ import Data.HashMap.Strict qualified as HashMap import Data.List qualified as List import Data.Map.Strict qualified as Map import Database.PostgreSQL.Simple qualified as PGS +import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType) import Gargantext.Core.Types import Gargantext.Database.Prelude (runOpaQuery, formatPGSQuery, runPGSQuery, DBCmd) import Gargantext.Database.Query.Join (leftJoin3) diff --git a/src/Gargantext/Database/Query/Table/NgramsPostag.hs b/src/Gargantext/Database/Query/Table/NgramsPostag.hs index aa39f5069c91beee2c1102fbf957639495d8bb5c..d82eff9d7e98206c60c1477390e8d57e08ed6f7f 100644 --- a/src/Gargantext/Database/Query/Table/NgramsPostag.hs +++ b/src/Gargantext/Database/Query/Table/NgramsPostag.hs @@ -24,11 +24,12 @@ import Data.HashMap.Strict qualified as HashMap import Data.List qualified as List import Database.PostgreSQL.Simple qualified as PGS import Gargantext.Core -import Gargantext.Core.Types +import Gargantext.Core.Text.Ngrams (Ngrams, ngramsSize, ngramsTerms) +import Gargantext.Core.Types ( POS ) import Gargantext.Database.Prelude (runPGSQuery, runPGSQuery_, DBCmd) -import Gargantext.Database.Query.Table.Ngrams +import Gargantext.Database.Query.Table.Ngrams ( NgramsId, insertNgrams ) import Gargantext.Database.Schema.Prelude -import Gargantext.Database.Types +import Gargantext.Database.Types ( Indexed(Indexed) ) import Gargantext.Prelude data NgramsPostag = NgramsPostag { _np_lang :: !Lang @@ -87,7 +88,7 @@ insertNgramsPostag' :: [NgramsPostagInsert] -> DBCmd err [Indexed Text Int] insertNgramsPostag' ns = runPGSQuery queryInsertNgramsPostag (PGS.Only $ Values fields ns) where - fields = map (\t -> QualifiedIdentifier Nothing t) $ snd fields_name + fields = map (QualifiedIdentifier Nothing) $ snd fields_name fields_name :: ( [Text], [Text]) fields_name = ( ["lang_id", "algo_id", "postag", "form", "form_n", "lem" , "lem_n"] @@ -155,7 +156,7 @@ SELECT terms,id FROM ins_form_ret selectLems :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)] selectLems l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems (PGS.Only $ Values fields datas) where - fields = map (\t -> QualifiedIdentifier Nothing t) ["int4","int4","text", "int4"] + fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"] datas = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns ---------------------- diff --git a/src/Gargantext/Database/Query/Table/NodeContext.hs b/src/Gargantext/Database/Query/Table/NodeContext.hs index cd1879ca36a2a8fbaef4baa6f04ccc3a740c5bf8..b13bba9b11aba45f992efe1d325e0c51f99e2248 100644 --- a/src/Gargantext/Database/Query/Table/NodeContext.hs +++ b/src/Gargantext/Database/Query/Table/NodeContext.hs @@ -50,13 +50,15 @@ import Data.Time (UTCTime) import Database.PostgreSQL.Simple qualified as PGS (In(..), Query, Only(..)) import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) -import Gargantext.Core -import Gargantext.Core.Types -import Gargantext.Database.Admin.Types.Hyperdata +import Gargantext.Core ( HasDBid(toDBid) ) +import Gargantext.Database.Admin.Types.Node +import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_publication_date ) +import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata ) import Gargantext.Database.Query.Table.Node.Error (HasNodeError, NodeError(..), nodeError) import Gargantext.Database.Prelude import Gargantext.Database.Schema.Context -import Gargantext.Database.Schema.Node +import Gargantext.Database.Schema.Ngrams () -- instances +import Gargantext.Database.Schema.Node ( node_id, node_typename, queryNodeTable, NodeRead ) import Gargantext.Database.Schema.NodeContext import Gargantext.Prelude import Gargantext.Prelude.Crypto.Hash (Hash) diff --git a/src/Gargantext/Database/Query/Table/NodeNgrams.hs b/src/Gargantext/Database/Query/Table/NodeNgrams.hs index d4ffadd8225d68624fc85ec50b89ef833bf2e399..307e0e852193741c418a0a527a63e997963d289b 100644 --- a/src/Gargantext/Database/Query/Table/NodeNgrams.hs +++ b/src/Gargantext/Database/Query/Table/NodeNgrams.hs @@ -16,7 +16,6 @@ NodeNgrams register Context of Ngrams (named Cgrams then) {-# LANGUAGE Arrows #-} {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE QuasiQuotes #-} -{-# LANGUAGE TemplateHaskell #-} module Gargantext.Database.Query.Table.NodeNgrams ( getCgramsId @@ -32,9 +31,10 @@ import Data.Map.Strict qualified as Map import Data.Maybe (fromJust) import Database.PostgreSQL.Simple qualified as PGS (Query, Only(..)) import Gargantext.Core +import Gargantext.Core.Text.Ngrams (NgramsType) import Gargantext.Core.Types import Gargantext.Database.Prelude (DBCmd, runPGSQuery) -import Gargantext.Database.Schema.Ngrams (NgramsType, fromNgramsTypeId) +import Gargantext.Database.Schema.Ngrams (fromNgramsTypeId) import Gargantext.Database.Schema.NodeNgrams import Gargantext.Database.Schema.Prelude (Select, FromRow, sql, fromRow, toField, field, Values(..), QualifiedIdentifier(..), selectTable) import Gargantext.Prelude diff --git a/src/Gargantext/Database/Query/Table/NodeNode.hs b/src/Gargantext/Database/Query/Table/NodeNode.hs index 51cd03d0c73ecc0d4c2f57efd3b6991bc5f9782b..cba59f8f39e91daae1195794f2bc18ca5a88008e 100644 --- a/src/Gargantext/Database/Query/Table/NodeNode.hs +++ b/src/Gargantext/Database/Query/Table/NodeNode.hs @@ -39,10 +39,12 @@ import Data.Text (splitOn) import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) -import Gargantext.Core -import Gargantext.Core.Types -import Gargantext.Database.Admin.Types.Hyperdata +import Gargantext.Core ( HasDBid(toDBid) ) +import Gargantext.Database.Admin.Types.Node +import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_publication_date ) +import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata ) import Gargantext.Database.Prelude (DBCmd, mkCmd, runPGSQuery, runCountOpaQuery, runOpaQuery) +import Gargantext.Database.Schema.Ngrams () import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.NodeNode import Gargantext.Prelude diff --git a/src/Gargantext/Database/Schema/Ngrams.hs b/src/Gargantext/Database/Schema/Ngrams.hs index e591b00394082c8df2a351eb3fb6e75fc07202ab..f74916d1405004b854c850e0a8adc4353e353cd1 100644 --- a/src/Gargantext/Database/Schema/Ngrams.hs +++ b/src/Gargantext/Database/Schema/Ngrams.hs @@ -20,26 +20,20 @@ Ngrams connection to the Database. module Gargantext.Database.Schema.Ngrams where -import Codec.Serialise (Serialise()) -import Control.Lens (over) -import Data.Aeson -import Data.Aeson.Types (toJSONKeyText) import Data.Bimap (Bimap) import Data.Bimap qualified as Bimap import Data.ByteString.Char8 qualified as B import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap -import Data.Text (splitOn, pack, strip) +import Data.Text (splitOn, strip) import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..)) import Gargantext.Core (HasDBid(..)) -import Gargantext.Core.Types (TODO(..), Typed(..)) +import Gargantext.Core.Text.Ngrams ( Ngrams(..), NgramsType(..), NgramsT ) +import Gargantext.Core.Types (Typed(..)) import Gargantext.Database.Schema.Prelude hiding (over) -import Gargantext.Database.Types +import Gargantext.Database.Types ( Indexed(Indexed) ) import Gargantext.Prelude -import Servant (FromHttpApiData(..), ToHttpApiData(..)) -import Test.QuickCheck (elements) -import Text.Read (read) type NgramsId = Int @@ -71,46 +65,6 @@ ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTable } ) --- | Main Ngrams Types --- | Typed Ngrams --- Typed Ngrams localize the context of the ngrams --- ngrams in source field of document has Sources Type --- ngrams in authors field of document has Authors Type --- ngrams in text fields of documents has Terms Type (i.e. either title or abstract) -data NgramsType = Authors | Institutes | Sources | NgramsTerms - deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic) - -instance Serialise NgramsType -instance FromJSON NgramsType - where - parseJSON (String "Authors") = pure Authors - parseJSON (String "Institutes") = pure Institutes - parseJSON (String "Sources") = pure Sources - parseJSON (String "Terms") = pure NgramsTerms - parseJSON (String "NgramsTerms") = pure NgramsTerms - parseJSON _ = mzero - -instance FromJSONKey NgramsType where - fromJSONKey = FromJSONKeyTextParser (parseJSON . String) - -instance ToJSON NgramsType - where - toJSON Authors = String "Authors" - toJSON Institutes = String "Institutes" - toJSON Sources = String "Sources" - toJSON NgramsTerms = String "Terms" - -instance ToJSONKey NgramsType where - toJSONKey = toJSONKeyText (pack . show) -instance FromHttpApiData NgramsType where - parseUrlPiece n = pure $ (read . cs) n -instance ToHttpApiData NgramsType where - toUrlPiece = pack . show -instance ToParamSchema NgramsType where - toParamSchema _ = toParamSchema (Proxy :: Proxy TODO) -instance Arbitrary NgramsType where - arbitrary = elements [ minBound .. maxBound ] - -- map NgramsType to its assigned id instance FromField NgramsType where fromField fld mdata = @@ -127,14 +81,19 @@ instance FromField NgramsType where instance ToField NgramsType where toField nt = toField $ toDBid nt +instance FromField Ngrams where + fromField fld mdata = do + x <- fromField fld mdata + pure $ text2ngrams x -ngramsTypes :: [NgramsType] -ngramsTypes = [minBound..] +instance PGS.ToRow Text where + toRow t = [toField t] + +text2ngrams :: Text -> Ngrams +text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt' + where + txt' = strip txt -instance ToSchema NgramsType -{- where - declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_") ---} newtype NgramsTypeId = NgramsTypeId Int deriving (Eq, Show, Ord, Num) @@ -179,46 +138,6 @@ instance HasDBid NgramsType where ------------------------------------------------------------------------ ------------------------------------------------------------------------ --- | TODO put it in Gargantext.Core.Text.Ngrams -data Ngrams = UnsafeNgrams { _ngramsTerms :: Text - , _ngramsSize :: Int - } - deriving (Generic, Show, Eq, Ord) - -instance Hashable Ngrams - -makeLenses ''Ngrams -instance PGS.ToRow Ngrams where - toRow (UnsafeNgrams t s) = [toField t, toField s] - -instance FromField Ngrams where - fromField fld mdata = do - x <- fromField fld mdata - pure $ text2ngrams x - -instance PGS.ToRow Text where - toRow t = [toField t] - -text2ngrams :: Text -> Ngrams -text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt' - where - txt' = strip txt - - ------------------------------------------------------------------------- -------------------------------------------------------------------------- --- | TODO put it in Gargantext.Core.Text.Ngrams --- Named entity are typed ngrams of Terms Ngrams -data NgramsT a = - NgramsT { _ngramsType :: NgramsType - , _ngramsT :: a - } deriving (Generic, Show, Eq, Ord) - -makeLenses ''NgramsT - -instance Functor NgramsT where - fmap = over ngramsT - ----------------------------------------------------------------------- withMap :: HashMap Text NgramsId -> Text -> NgramsId withMap m n = maybe (panicTrace $ "[G.D.S.Ngrams.withMap] Should not happen" <> (show n)) diff --git a/src/Gargantext/Database/Schema/NgramsPostag.hs b/src/Gargantext/Database/Schema/NgramsPostag.hs index e877bbad76b829d65747bef8a8a0960d5540bebd..51b7b57d864d123bcde8dc91d1b1978280f2debd 100644 --- a/src/Gargantext/Database/Schema/NgramsPostag.hs +++ b/src/Gargantext/Database/Schema/NgramsPostag.hs @@ -20,9 +20,9 @@ ngrams in NgramsTerm Lists. module Gargantext.Database.Schema.NgramsPostag where -import Control.Lens +import Control.Lens ( makeLenses ) import Database.PostgreSQL.Simple qualified as PGS -import Gargantext.Database.Schema.Prelude +import Gargantext.Database.Schema.Prelude ( Column, SqlInt4, SqlText, ToField(toField), toRow ) import Gargantext.Prelude diff --git a/src/Gargantext/Database/Schema/NodeNgrams.hs b/src/Gargantext/Database/Schema/NodeNgrams.hs index 3800bf1bb828bdd1af5305326b6f42c44f244037..819d8a8d7407f3e8762fa0290af09199bedec27d 100644 --- a/src/Gargantext/Database/Schema/NodeNgrams.hs +++ b/src/Gargantext/Database/Schema/NodeNgrams.hs @@ -20,8 +20,9 @@ NodeNgrams register Context of Ngrams (named Cgrams then) module Gargantext.Database.Schema.NodeNgrams where -import Gargantext.Core.Types -import Gargantext.Database.Schema.Ngrams (NgramsType) +import Gargantext.Core.Text.Ngrams (NgramsType) +import Gargantext.Core.Types.Main ( ListType ) +import Gargantext.Database.Admin.Types.Node ( NodeId ) import Gargantext.Database.Schema.Prelude import Gargantext.Prelude diff --git a/src/Gargantext/Utils/SpacyNLP.hs b/src/Gargantext/Utils/SpacyNLP.hs index 2696a9da27afe8e577bd213a12caaedd7d78c303..4cdc63a3215b78c1975569916440dcf497034f7b 100644 --- a/src/Gargantext/Utils/SpacyNLP.hs +++ b/src/Gargantext/Utils/SpacyNLP.hs @@ -13,7 +13,6 @@ Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server -} -{-# LANGUAGE TemplateHaskell #-} module Gargantext.Utils.SpacyNLP ( module Gargantext.Utils.SpacyNLP.Types @@ -24,9 +23,8 @@ module Gargantext.Utils.SpacyNLP ( ) where import Data.Aeson (encode) -import Data.Text hiding (map, group, filter, concat, zip) import Gargantext.Core (Lang(..)) -import Gargantext.Core.Text.Terms.Multi.PosTagging.Types +import Gargantext.Core.Text.Terms.Multi.PosTagging.Types ( PosSentences(PosSentences), Sentence(Sentence), Token(Token) ) import Gargantext.Prelude import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response) import Network.URI (URI(..)) @@ -42,22 +40,22 @@ spacyRequest uri txt = do ---------------------------------------------------------------- spacyTagsToToken :: SpacyTags -> Token -spacyTagsToToken st = Token (_spacyTags_index st) - (_spacyTags_normalized st) - (_spacyTags_text st) - (_spacyTags_lemma st) - (_spacyTags_head_index st) - (_spacyTags_char_offset st) - (Just $ _spacyTags_pos st) - (Just $ _spacyTags_ent_type st) - (Just $ _spacyTags_prefix st) - (Just $ _spacyTags_suffix st) +spacyTagsToToken st = + Token (_spacyTags_index st) + (_spacyTags_normalized st) + (_spacyTags_text st) + (_spacyTags_lemma st) + (_spacyTags_head_index st) + (_spacyTags_char_offset st) + (Just $ _spacyTags_pos st) + (Just $ _spacyTags_ent_type st) + (Just $ _spacyTags_prefix st) + (Just $ _spacyTags_suffix st) spacyDataToPosSentences :: SpacyData -> PosSentences spacyDataToPosSentences (SpacyData ds) = PosSentences - $ map (\(i, ts) -> Sentence i ts) - $ zip [1..] - $ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds + $ zipWith Sentence [1..] + (map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds) ----------------------------------------------------------------- diff --git a/test/Test/API/UpdateList.hs b/test/Test/API/UpdateList.hs index 83ba0ef297df1ce79cfe0754838f4e3ce07985e0..bd6c3a45b3ac3a55514ef67eeea432167dba7495 100644 --- a/test/Test/API/UpdateList.hs +++ b/test/Test/API/UpdateList.hs @@ -13,7 +13,7 @@ module Test.API.UpdateList ( , pollUntilFinished ) where -import Control.Lens ((^.), mapped, over, view) +import Control.Lens ((^.), mapped, over) import Control.Monad.Fail (fail) import Data.Aeson.QQ import Data.Map.Strict qualified as Map @@ -27,14 +27,13 @@ import Gargantext.API.Admin.Auth.Types (Token) import Gargantext.API.Ngrams qualified as APINgrams import Gargantext.API.Ngrams.List ( ngramsListFromCSVData ) import Gargantext.API.Ngrams.Types ( MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTablePatch(..), NgramsTerm(..), Versioned(..), mSetToList, toNgramsPatch, ne_children, ne_ngrams, vc_data, _NgramsTable ) -import Gargantext.Core.NodeStory (hasNodeStory, nse_getter, HasNodeArchiveStoryImmediateSaver(..)) +import Gargantext.Core.Text.Ngrams import Gargantext.Core.Types ( CorpusId, ListId, ListType(..), NodeId, _NodeId ) import Gargantext.Core.Types.Individu import Gargantext.Database.Action.User import Gargantext.Database.Admin.Types.Hyperdata.Corpus import Gargantext.Database.Query.Table.Node import Gargantext.Database.Query.Tree.Root -import Gargantext.Database.Schema.Ngrams import Gargantext.Prelude hiding (get) import Network.Wai.Handler.Warp qualified as Wai import Paths_gargantext (getDataFileName) diff --git a/test/Test/Database/Operations/NodeStory.hs b/test/Test/Database/Operations/NodeStory.hs index 800eaec5a79f0fc5fb654ba1f53f00457571cd5e..a1070243668882363f96f86e951900905290e2e1 100644 --- a/test/Test/Database/Operations/NodeStory.hs +++ b/test/Test/Database/Operations/NodeStory.hs @@ -24,6 +24,7 @@ import Gargantext.API.Ngrams (commitStatePatch, mSetFromList, setListNgrams, sav import Gargantext.API.Ngrams.Types (MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTerm(..), Versioned(..), mkNgramsTablePatch, nre_children, nre_list, nre_parent, nre_root) import Gargantext.API.Ngrams.Tools (getNodeStory) import Gargantext.Core.NodeStory +import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Types.Individu import Gargantext.Core.Types (ListType(..), ListId, NodeId, UserId) import Gargantext.Database.Action.User (getUserId) @@ -32,7 +33,6 @@ import Gargantext.Database.Prelude (runPGSQuery) import Gargantext.Database.Query.Table.Ngrams (selectNgramsId) import Gargantext.Database.Query.Table.Node import Gargantext.Database.Query.Tree.Root -import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Prelude import Test.Database.Types