[ngrams] some refactoring: Gargantext.Core.Text.Ngrams module

This contains only data, removes some Database dependency in imports.
parent c72309dd
Pipeline #5800 passed with stages
in 100 minutes and 37 seconds
......@@ -34,6 +34,7 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
......@@ -42,7 +43,6 @@ import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory,doesFileExist)
......
......@@ -18,13 +18,13 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloTools (toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory)
......
......@@ -153,6 +153,7 @@ library
Gargantext.Core.Text.Metrics.CharByChar
Gargantext.Core.Text.Metrics.Count
Gargantext.Core.Text.Metrics.TFICF
Gargantext.Core.Text.Ngrams
Gargantext.Core.Text.Prepare
Gargantext.Core.Text.Search
Gargantext.Core.Text.Terms
......
......@@ -19,11 +19,10 @@ add get
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams
......@@ -106,13 +105,13 @@ import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude ( GargM )
import Gargantext.Core.NodeStory (ArchiveList, HasNodeStory, HasNodeArchiveStoryImmediateSaver(..), HasNodeStoryImmediateSaver(..), NgramsStatePatch', a_history, a_state, a_version, currentVersion)
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasValidationError, ContextId)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Query.Table.Ngrams ( text2ngrams, Ngrams, insertNgrams, selectNgramsByDoc )
import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Ngrams ( text2ngrams, insertNgrams, selectNgramsByDoc )
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select ( selectNodesWithUsername )
......@@ -179,7 +178,7 @@ listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
ngramsStatePatchConflictResolution :: TableNgrams.NgramsType
ngramsStatePatchConflictResolution :: NgramsType
-> NgramsTerm
-> ConflictResolutionNgramsPatch
ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
......@@ -234,7 +233,7 @@ addListNgrams listId ngramsType nes = do
-- UNSAFE
setListNgrams :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> NgramsType
-> Map NgramsTerm NgramsRepoElement
-> m ()
setListNgrams listId ngramsType ns = do
......@@ -357,7 +356,7 @@ commitStatePatch listId (Versioned _p_version p) = do
-- This is a special case of tableNgramsPut where the input patch is empty.
tableNgramsPull :: HasNodeStory env err m
=> ListId
-> TableNgrams.NgramsType
-> NgramsType
-> Version
-> m (Versioned NgramsTablePatch)
tableNgramsPull listId ngramsType p_version = do
......@@ -487,7 +486,7 @@ tableNgramsPostChartsAsync utn jobHandle = do
getNgramsTableMap :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> NgramsType
-> m (Versioned NgramsTableMap)
getNgramsTableMap nodeId ngramsType = do
a <- getNodeStory nodeId
......@@ -498,7 +497,7 @@ getNgramsTableMap nodeId ngramsType = do
dumpJsonTableMap :: HasNodeStory env err m
=> Text
-> NodeId
-> TableNgrams.NgramsType
-> NgramsType
-> m ()
dumpJsonTableMap fpath nodeId ngramsType = do
m <- getNgramsTableMap nodeId ngramsType
......@@ -617,7 +616,7 @@ getNgramsTable' :: forall env err m.
, HasNodeError err )
=> NodeId
-> ListId
-> TableNgrams.NgramsType
-> NgramsType
-> m (Versioned (Map.Map NgramsTerm NgramsElement))
getNgramsTable' nId listId ngramsType = do
tableMap <- getNgramsTableMap listId ngramsType
......@@ -631,7 +630,7 @@ setNgramsTableScores :: forall env err m t.
, HasNodeError err )
=> NodeId
-> ListId
-> TableNgrams.NgramsType
-> NgramsType
-> t
-> m t
setNgramsTableScores nId listId ngramsType table = do
......@@ -821,7 +820,7 @@ apiNgramsAsync _dId =
-- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
-- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
listNgramsChangedSince :: HasNodeStory env err m
=> ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
=> ListId -> NgramsType -> Version -> m (Versioned Bool)
listNgramsChangedSince listId ngramsType version
| version < 0 =
Versioned <$> currentVersion listId <*> pure True
......
......@@ -10,7 +10,6 @@ Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ViewPatterns #-}
......@@ -29,21 +28,21 @@ import Data.Text (concat, pack, splitOn)
import Data.Vector (Vector)
import Data.Vector qualified as Vec
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Errors.Types
import Gargantext.API.Admin.Orchestrator.Types ( AsyncJobs, JobLog )
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.API.Ngrams (setListNgrams)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargServer, GargM, serverError, HasServerError)
import Gargantext.API.Types
import Gargantext.Core.NodeStory
import Gargantext.API.Types (HTML)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Admin.Types.Node ( NodeId(_NodeId), ListId )
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Ngrams ( text2ngrams, NgramsId )
import Gargantext.Database.Schema.Node (_node_parent_id)
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude hiding (concat, toList)
......@@ -113,7 +112,7 @@ getCsv :: HasNodeStory env err m
-> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
getCsv lId = do
lst <- getNgramsList lId
pure $ case Map.lookup TableNgrams.NgramsTerms lst of
pure $ case Map.lookup NgramsTerms lst of
Nothing -> noHeader Map.empty
Just (Versioned { _v_data }) ->
addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
......
......@@ -25,9 +25,9 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.List.Social.Prelude ( unPatchMapToHashMap )
import Gargantext.Core.Text.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Core.Types.Main ( ListType )
import Gargantext.Database.Admin.Types.Node (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Prelude
......@@ -61,7 +61,7 @@ toTermList lt nt nl = toTermList' lt <$> Map.lookup nt nl
where
toTerm = Text.splitOn " " . unNgramsTerm
(roots, children) = List.partition (\(_t, nre) -> view nre_root nre == Nothing)
(roots, children) = List.partition (\(_t, nre) -> isNothing (view nre_root nre))
$ List.filter (\(_t,nre) -> view nre_list nre == lt'') ns
roots' = map (\(t,nre) -> (t, map toTerm $ unMSet $ view nre_children nre )) roots
......
......@@ -12,7 +12,6 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Use infix" #-}
module Gargantext.API.Ngrams.Tools
where
......@@ -24,11 +23,11 @@ import Data.HashMap.Strict qualified as HM
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
-- import GHC.Conc (TVar, readTVar)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm), NgramsRepoElement(_nre_root, _nre_list) )
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId, ListId )
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
......
......@@ -37,12 +37,12 @@ import Data.Validity ( Validity(..) )
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.Ngrams qualified as Ngrams
import Gargantext.Core.Types (ListType(..), ListId, NodeId, TODO)
import Gargantext.Core.Types.Query (Limit, Offset, MaxSize, MinSize)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (IsString, hash, from, replace, to)
import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
import Gargantext.Utils.Servant (CSV, ZIP)
......@@ -551,7 +551,7 @@ instance ToField NgramsTablePatch
where
toField = toJSONField
instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
instance FromField (PatchMap Ngrams.NgramsType (PatchMap NodeId NgramsTablePatch))
where
fromField = fromField'
......@@ -747,21 +747,21 @@ type RepoCmdM env err m =
-- Instances
instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
instance FromHttpApiData (Map Ngrams.NgramsType (Versioned NgramsTableMap))
where
parseUrlPiece x = maybeToEither x (decode $ cs x)
instance ToHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap)) where
instance ToHttpApiData (Map Ngrams.NgramsType (Versioned NgramsTableMap)) where
toUrlPiece m = cs (encode m)
ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
ngramsTypeFromTabType :: TabType -> Ngrams.NgramsType
ngramsTypeFromTabType tabType =
let here = "Garg.API.Ngrams: " :: Text in
case tabType of
Sources -> TableNgrams.Sources
Authors -> TableNgrams.Authors
Institutes -> TableNgrams.Institutes
Terms -> TableNgrams.NgramsTerms
Sources -> Ngrams.Sources
Authors -> Ngrams.Authors
Institutes -> Ngrams.Institutes
Terms -> Ngrams.NgramsTerms
_ -> panicTrace $ here <> "No Ngrams for this tab"
-- TODO: This `panic` would disapear with custom NgramsType.
......@@ -784,7 +784,7 @@ instance ToSchema UpdateTableNgramsCharts where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_utn_")
------------------------------------------------------------------------
type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
type NgramsList = (Map Ngrams.NgramsType (Versioned NgramsTableMap))
-- | Same as NgramsList, but wraps node_id so that the inner .json file can have proper name
......
......@@ -27,7 +27,9 @@ import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) )
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.NodeStory.Types ( NodeListStory )
import Gargantext.Core.Types
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
......@@ -37,7 +39,6 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context (_context_id)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash)
import Servant (Headers, Header, addHeader)
......
......@@ -14,13 +14,13 @@ Portability : POSIX
module Gargantext.API.Node.Corpus.Export.Types where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, ToParamSchema(..) )
import Data.Text (Text)
import GHC.Generics (Generic)
import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
import Gargantext.Core.Types
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types ( ListId, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Servant
......
......@@ -9,9 +9,8 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Update
where
......@@ -19,16 +18,17 @@ module Gargantext.API.Node.Update
import Control.Lens (view)
import Data.Aeson
import Data.Set qualified as Set
import Data.Swagger
import Data.Swagger ( ToSchema )
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Metrics qualified as Metrics
import Gargantext.API.Ngrams.Types qualified as NgramsTypes
import Gargantext.API.Prelude (GargM, simuLogs)
import Gargantext.Core.Methods.Similarities (GraphMetric(..))
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..), BridgenessMethod(..))
......@@ -38,20 +38,19 @@ import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI)
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(HyperdataPhylo) )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus, NodeAnnuaire) )
import Gargantext.Database.Query.Table.Node (defaultList, getNode)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Prelude
import Gargantext.System.Logging ( MonadLogger )
import Gargantext.Utils.Aeson qualified as GUA
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Gargantext.Utils.UTCTime (timeMeasured)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Utils.UTCTime (timeMeasured)
import Gargantext.System.Logging
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
------------------------------------------------------------------------
type API = Summary " Update node according to NodeType params"
......
......@@ -14,8 +14,8 @@ Portability : POSIX
module Gargantext.Core.Flow.Types where
import Control.Lens
import Gargantext.Database.Admin.Types.Node
import Control.Lens ( Lens' )
import Gargantext.Database.Admin.Types.Node ( Node )
import Gargantext.Database.Schema.Node (node_hash_id)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
......
......@@ -73,10 +73,10 @@ import Database.PostgreSQL.Simple.ToField qualified as PGS
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory.DB
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Text.Ngrams qualified as Ngrams
import Gargantext.Database.Admin.Types.Node ( ListId, NodeId(..) )
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (HasConnectionPool(..))
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Database ( runPGSAdvisoryXactLock, runPGSExecute, runPGSQuery )
......@@ -84,7 +84,7 @@ import Gargantext.Prelude.Database ( runPGSAdvisoryXactLock, runPGSExecute, runP
getNodeStory' :: PGS.Connection -> NodeId -> IO ArchiveList
getNodeStory' c nId = do
--res <- withResource pool $ \c -> runSelect c query :: IO [NodeStoryPoly NodeId Version Int Int NgramsRepoElement]
res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)]
res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, Ngrams.NgramsType, NgramsTerm, NgramsRepoElement)]
-- We have multiple rows with same node_id and different (ngrams_type_id, ngrams_id).
-- Need to create a map: {<node_id>: {<ngrams_type_id>: {<ngrams_id>: <data>}}}
let dbData = map (\(version, ngramsType, ngrams, ngrams_repo_element) ->
......@@ -341,13 +341,13 @@ fixNodeStoryVersions = do
-- printDebug "[fixNodeStoryVersions] nIds" nIds
mapM_ (\(PGS.Only nId) -> do
-- printDebug "[fixNodeStoryVersions] nId" nId
updateVer c TableNgrams.Authors nId
updateVer c Ngrams.Authors nId
updateVer c TableNgrams.Institutes nId
updateVer c Ngrams.Institutes nId
updateVer c TableNgrams.Sources nId
updateVer c Ngrams.Sources nId
updateVer c TableNgrams.NgramsTerms nId
updateVer c Ngrams.NgramsTerms nId
pure ()
) nIds
......@@ -363,7 +363,7 @@ fixNodeStoryVersions = do
SET version = ?
WHERE node_id = ?
AND ngrams_type_id = ? |]
updateVer :: PGS.Connection -> TableNgrams.NgramsType -> Int64 -> IO ()
updateVer :: PGS.Connection -> Ngrams.NgramsType -> Int64 -> IO ()
updateVer c ngramsType nId = do
maxVer <- runPGSQuery c maxVerQuery (nId, ngramsType) :: IO [PGS.Only (Maybe Int64)]
case maxVer of
......
......@@ -12,7 +12,6 @@ Portability : POSIX
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.NodeStory.DB
( nodeExists
......@@ -27,22 +26,20 @@ module Gargantext.Core.NodeStory.DB
where
import Control.Lens ((^.))
import Control.Monad.Except
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Monoid
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.API.Ngrams.Types
import Gargantext.Core (toDBid)
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Types (NodeId(..), NodeType)
import Gargantext.Core.NodeStory.Types ( a_state, a_version, ArchiveList, ArchiveStateList, NgramsStatePatch' )
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Database.Admin.Types.Node ( NodeId(..), NodeType )
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Schema.Ngrams ()
import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Database
......@@ -70,7 +67,7 @@ getNodesArchiveHistory :: PGS.Connection
-> IO [(NodeId, (Map NgramsType [HashMap NgramsTerm NgramsPatch]))]
getNodesArchiveHistory c nodesId = do
as <- runPGSQuery c query (PGS.Only $ Values fields nodesId)
:: IO [(Int, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
:: IO [(Int, NgramsType, NgramsTerm, NgramsPatch)]
pure $ map (\(nId, ngramsType, terms, patch)
-> ( UnsafeMkNodeId nId
......@@ -96,11 +93,11 @@ insertNodeArchiveHistory _ _ _ [] = pure ()
insertNodeArchiveHistory c nodeId version (h:hs) = do
let tuples = mconcat $ (\(nType, NgramsTablePatch patch) ->
(\(term, p) ->
(nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
(nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, NgramsType, NgramsTerm, NgramsPatch)]
tuplesM <- mapM (\(nId, nType, term, patch) -> do
[PGS.Only ngramsId] <- runPGSReturning c qInsert [PGS.Only term] :: IO [PGS.Only Int]
pure (nId, nType, ngramsId, term, patch)
) tuples :: IO [(NodeId, TableNgrams.NgramsType, Int, NgramsTerm, NgramsPatch)]
) tuples :: IO [(NodeId, NgramsType, Int, NgramsTerm, NgramsPatch)]
_ <- runPGSExecuteMany c query $ ((\(nId, nType, termId, _term, patch) -> (nId, nType, termId, patch, version)) <$> tuplesM)
_ <- insertNodeArchiveHistory c nodeId version hs
pure ()
......
......@@ -56,10 +56,10 @@ import Data.Set qualified as Set
import Database.PostgreSQL.Simple.FromField (FromField(fromField), fromJSONField)
import Gargantext.API.Ngrams.Types
import Gargantext.Database.Admin.Types.Node ( NodeId(..) )
import Gargantext.Core.Text.Ngrams qualified as Ngrams
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (DbCmd')
import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Prelude hiding (to)
import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField)
......@@ -100,8 +100,8 @@ instance (Serialise s, Serialise p) => Serialise (Archive s p)
type NodeListStory = NodeStory NgramsState' NgramsStatePatch'
-- NOTE: 'type NgramsTableMap = Map NgramsTerm NgramsRepoElement'
type NgramsState' = Map TableNgrams.NgramsType NgramsTableMap
type NgramsStatePatch' = PatchMap TableNgrams.NgramsType NgramsTablePatch
type NgramsState' = Map Ngrams.NgramsType NgramsTableMap
type NgramsStatePatch' = PatchMap Ngrams.NgramsType NgramsTablePatch
-- instance Serialise NgramsStatePatch'
instance FromField (Archive NgramsState' NgramsStatePatch')
where
......@@ -167,7 +167,7 @@ initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive
archive = Archive { _a_version = 0
, _a_state = ngramsTableMap
, _a_history = [] }
ngramsTableMap = Map.singleton TableNgrams.NgramsTerms
ngramsTableMap = Map.singleton Ngrams.NgramsTerms
$ Map.fromList
[ (n ^. ne_ngrams, ngramsElementToRepo n)
| n <- mockTable ^. _NgramsTable
......@@ -231,8 +231,8 @@ class HasNodeArchiveStoryImmediateSaver env where
type ArchiveStateList = [(TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)]
type ArchiveStateSet = Set.Set (TableNgrams.NgramsType, NgramsTerm)
type ArchiveStateList = [(Ngrams.NgramsType, NgramsTerm, NgramsRepoElement)]
type ArchiveStateSet = Set.Set (Ngrams.NgramsType, NgramsTerm)
------------------------------------------------------------------------
------------------------------------------------------------------------
......
......@@ -46,11 +46,11 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex)
import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Query.Table.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (show, undefined)
import Gargantext.Utils.Zip qualified as UZip
import Protolude
import Protolude ( show )
import System.FilePath (takeExtension)
------------------------------------------------------------------------
......
......@@ -34,6 +34,7 @@ import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Social ( FlowSocialListWith, flowSocialList )
import Gargantext.Core.Text.List.Social.Prelude ( FlowListScores, FlowCont(FlowCont), flc_scores )
import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_genInc, normalizeGlobal, normalizeLocal, scored_terms)
import Gargantext.Core.Text.Ngrams (NgramsType(..), Ngrams(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap
......@@ -45,7 +46,7 @@ import Gargantext.Database.Query.Table.NgramsPostag (selectLems)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Schema.Ngrams (NgramsType(..), Ngrams(..), text2ngrams)
import Gargantext.Database.Schema.Ngrams (text2ngrams)
import Gargantext.Prelude
{-
......@@ -118,8 +119,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize,
$ List.take maxListSize
$ List.sortOn (Down . viewScore . snd)
$ HashMap.toList tailTerms'
pure $ Map.fromList [( nt, List.take maxListSize $ toNgramsElement stopTerms
<> toNgramsElement mapTerms
<> toNgramsElement (setListType (Just MapTerm ) mapTerms')
......@@ -127,6 +127,14 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize,
)]
-- | https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169#note_10049
-- Stemming can be useful if you do not have any context: ok for full text search then.
--
-- In document, we have context so we can add grammar and linguistics
-- rules to be more precise than the stemmatization, that is why the
-- lemmatization is used here to group. Basically it will avoid
-- grouping homonyms in list. In search usually you add more context
-- to "control" the stemmatization approximation.
getGroupParams :: ( HasNodeError err
, HasTreeError err
)
......@@ -191,6 +199,8 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
!(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms
-- void $ panicTrace $ "groupedWithList: " <> show groupedWithList
-- printDebug "[buildNgramsTermsList] socialLists" socialLists
-- printDebug "[buildNgramsTermsList] socialLists with scores" socialLists_Stemmed
-- printDebug "[buildNgramsTermsList] groupedWithList" groupedWithList
......
......@@ -25,11 +25,11 @@ import Data.HashSet qualified as Set
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Text qualified as Text
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types ( toNgramsPatch, NgramsPatch, NgramsTerm(..) )
import Gargantext.Core (Lang(..), Form, Lem, NLPServerConfig)
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.List.Group.Prelude ( Stem )
import Gargantext.Core.Text.List.Social.Patch ( addScorePatch )
import Gargantext.Core.Text.List.Social.Prelude ( FlowCont, FlowListScores )
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Prelude
......@@ -43,7 +43,7 @@ addScoreStem groupParams ngrams fl = foldl' addScorePatch fl
------------------------------------------------------------------------
-- | Main Types
data StopSize = StopSize {unStopSize :: !Int}
newtype StopSize = StopSize {unStopSize :: Int}
deriving (Eq)
-- | TODO: group with 2 terms only can be
......@@ -80,7 +80,6 @@ groupWith (GroupParams { unGroupParams_lang = l }) t =
$ Text.splitOn " "
$ Text.replace "-" " "
$ unNgramsTerm t
-- | This lemmatization group done with CoreNLP algo (or others)
groupWith (GroupWithPosTag { _gwl_map = m }) t =
case HashMap.lookup (unNgramsTerm t) m of
......
......@@ -18,24 +18,23 @@ import Data.Aeson
import Data.HashMap.Strict (HashMap)
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Pool
import Data.Swagger
import Data.Pool ( withResource )
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, defaultSchemaOptions )
import Data.Text qualified as T
import Data.Vector qualified as V
import GHC.Generics
import Gargantext.API.Ngrams.Types (NgramsTerm, NgramsPatch)
import Gargantext.Core.NodeStory (getNodesArchiveHistory)
import Gargantext.Core.Text.List.Social.Find (findListsId)
import Gargantext.Core.Text.List.Social.Patch (addScorePatches)
import Gargantext.Core.Text.List.Social.Prelude (FlowCont, FlowListScores)
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Types.Individu (User)
import Gargantext.Database.Admin.Types.Node (ListId)
import Gargantext.Database.Prelude (DBCmd, connPool)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree (NodeMode(Private), HasTreeError)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import Test.QuickCheck
import Test.QuickCheck ( Arbitrary(arbitrary), oneof, arbitraryBoundedEnum )
import Web.Internal.HttpApiData (ToHttpApiData, FromHttpApiData, parseUrlPiece, toUrlPiece)
------------------------------------------------------------------------
------------------------------------------------------------------------
......
......@@ -14,14 +14,13 @@ module Gargantext.Core.Text.List.Social.Patch
import Control.Lens hiding (cons)
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Patch.Class qualified as Patch (Replace(..))
import Gargantext.API.Ngrams.Prelude (unMSet, patchMSet_toList)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types ( NgramsTerm, nre_children, nre_list, MSet, NgramsPatch(..) )
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Database.Admin.Types.Node ( ListId )
import Gargantext.Prelude
addScorePatches :: NgramsType -> [ListId]
......@@ -40,7 +39,7 @@ addScorePatchesList :: NgramsType
addScorePatchesList nt repo fl lid =