Commit 72f5f35c authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/169-dev-singulars-plurals' into dev

parents bffbc619 445ba8f1
......@@ -34,6 +34,7 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
......@@ -42,7 +43,6 @@ import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory,doesFileExist)
......
......@@ -18,13 +18,13 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloTools (toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory)
......
......@@ -100,9 +100,9 @@ library
Gargantext.API
Gargantext.API.Admin.Auth.Types
Gargantext.API.Admin.EnvTypes
Gargantext.API.Admin.Orchestrator.Types
Gargantext.API.Admin.Settings
Gargantext.API.Admin.Settings.CORS
Gargantext.API.Admin.Orchestrator.Types
Gargantext.API.Admin.Types
Gargantext.API.Auth.PolicyCheck
Gargantext.API.Dev
......@@ -141,18 +141,21 @@ library
Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.EPO
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.API.OpenAlex
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.List
Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Formats.CSV
Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics.CharByChar
Gargantext.Core.Text.Metrics.Count
Gargantext.Core.Text.Metrics.TFICF
Gargantext.Core.Text.Ngrams
Gargantext.Core.Text.Prepare
Gargantext.Core.Text.Search
Gargantext.Core.Text.Terms
......@@ -170,8 +173,8 @@ library
Gargantext.Core.Types
Gargantext.Core.Types.Individu
Gargantext.Core.Types.Main
Gargantext.Core.Types.Query
Gargantext.Core.Types.Phylo
Gargantext.Core.Types.Query
Gargantext.Core.Utils
Gargantext.Core.Utils.Prefix
Gargantext.Core.Viz.Graph
......@@ -189,6 +192,7 @@ library
Gargantext.Core.Viz.Types
Gargantext.Database.Action.Flow
Gargantext.Database.Action.Flow.Types
Gargantext.Database.Action.Metrics.TFICF
Gargantext.Database.Action.Search
Gargantext.Database.Action.User
Gargantext.Database.Action.User.New
......@@ -205,8 +209,8 @@ library
Gargantext.Database.Query.Table.Node
Gargantext.Database.Query.Table.Node.Error
Gargantext.Database.Query.Table.Node.UpdateOpaleye
Gargantext.Database.Query.Tree.Root
Gargantext.Database.Query.Table.User
Gargantext.Database.Query.Tree.Root
Gargantext.Database.Schema.Ngrams
Gargantext.Database.Schema.Node
Gargantext.Database.Schema.User
......@@ -309,11 +313,9 @@ library
Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
Gargantext.Core.Text.Corpus.Parsers.Wikimedia
Gargantext.Core.Text.Learn
Gargantext.Core.Text.List
Gargantext.Core.Text.List.Group
Gargantext.Core.Text.List.Group.Prelude
Gargantext.Core.Text.List.Group.WithScores
Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Learn
Gargantext.Core.Text.List.Merge
Gargantext.Core.Text.List.Social
......@@ -365,7 +367,6 @@ library
Gargantext.Database.Action.Metrics
Gargantext.Database.Action.Metrics.Lists
Gargantext.Database.Action.Metrics.NgramsByContext
Gargantext.Database.Action.Metrics.TFICF
Gargantext.Database.Action.Node
Gargantext.Database.Action.Share
Gargantext.Database.Action.TSQuery
......
......@@ -19,11 +19,10 @@ add get
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams
......@@ -106,13 +105,13 @@ import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude ( GargM )
import Gargantext.Core.NodeStory (ArchiveList, HasNodeStory, HasNodeArchiveStoryImmediateSaver(..), HasNodeStoryImmediateSaver(..), NgramsStatePatch', a_history, a_state, a_version, currentVersion)
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasValidationError, ContextId)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Query.Table.Ngrams ( text2ngrams, Ngrams, insertNgrams, selectNgramsByDoc )
import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Ngrams ( text2ngrams, insertNgrams, selectNgramsByDoc )
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select ( selectNodesWithUsername )
......@@ -179,7 +178,7 @@ listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
ngramsStatePatchConflictResolution :: TableNgrams.NgramsType
ngramsStatePatchConflictResolution :: NgramsType
-> NgramsTerm
-> ConflictResolutionNgramsPatch
ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
......@@ -234,7 +233,7 @@ addListNgrams listId ngramsType nes = do
-- UNSAFE
setListNgrams :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> NgramsType
-> Map NgramsTerm NgramsRepoElement
-> m ()
setListNgrams listId ngramsType ns = do
......@@ -357,7 +356,7 @@ commitStatePatch listId (Versioned _p_version p) = do
-- This is a special case of tableNgramsPut where the input patch is empty.
tableNgramsPull :: HasNodeStory env err m
=> ListId
-> TableNgrams.NgramsType
-> NgramsType
-> Version
-> m (Versioned NgramsTablePatch)
tableNgramsPull listId ngramsType p_version = do
......@@ -487,7 +486,7 @@ tableNgramsPostChartsAsync utn jobHandle = do
getNgramsTableMap :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> NgramsType
-> m (Versioned NgramsTableMap)
getNgramsTableMap nodeId ngramsType = do
a <- getNodeStory nodeId
......@@ -498,7 +497,7 @@ getNgramsTableMap nodeId ngramsType = do
dumpJsonTableMap :: HasNodeStory env err m
=> Text
-> NodeId
-> TableNgrams.NgramsType
-> NgramsType
-> m ()
dumpJsonTableMap fpath nodeId ngramsType = do
m <- getNgramsTableMap nodeId ngramsType
......@@ -617,7 +616,7 @@ getNgramsTable' :: forall env err m.
, HasNodeError err )
=> NodeId
-> ListId
-> TableNgrams.NgramsType
-> NgramsType
-> m (Versioned (Map.Map NgramsTerm NgramsElement))
getNgramsTable' nId listId ngramsType = do
tableMap <- getNgramsTableMap listId ngramsType
......@@ -631,7 +630,7 @@ setNgramsTableScores :: forall env err m t.
, HasNodeError err )
=> NodeId
-> ListId
-> TableNgrams.NgramsType
-> NgramsType
-> t
-> m t
setNgramsTableScores nId listId ngramsType table = do
......@@ -821,7 +820,7 @@ apiNgramsAsync _dId =
-- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
-- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
listNgramsChangedSince :: HasNodeStory env err m
=> ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
=> ListId -> NgramsType -> Version -> m (Versioned Bool)
listNgramsChangedSince listId ngramsType version
| version < 0 =
Versioned <$> currentVersion listId <*> pure True
......
......@@ -10,7 +10,6 @@ Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ViewPatterns #-}
......@@ -29,21 +28,21 @@ import Data.Text (concat, pack, splitOn)
import Data.Vector (Vector)
import Data.Vector qualified as Vec
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Errors.Types
import Gargantext.API.Admin.Orchestrator.Types ( AsyncJobs, JobLog )
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.API.Ngrams (setListNgrams)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargServer, GargM, serverError, HasServerError)
import Gargantext.API.Types
import Gargantext.Core.NodeStory
import Gargantext.API.Types (HTML)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Admin.Types.Node ( NodeId(_NodeId), ListId )
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Ngrams ( text2ngrams, NgramsId )
import Gargantext.Database.Schema.Node (_node_parent_id)
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude hiding (concat, toList)
......@@ -113,7 +112,7 @@ getCsv :: HasNodeStory env err m
-> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
getCsv lId = do
lst <- getNgramsList lId
pure $ case Map.lookup TableNgrams.NgramsTerms lst of
pure $ case Map.lookup NgramsTerms lst of
Nothing -> noHeader Map.empty
Just (Versioned { _v_data }) ->
addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
......
......@@ -25,9 +25,9 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.List.Social.Prelude ( unPatchMapToHashMap )
import Gargantext.Core.Text.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Core.Types.Main ( ListType )
import Gargantext.Database.Admin.Types.Node (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Prelude
......@@ -61,7 +61,7 @@ toTermList lt nt nl = toTermList' lt <$> Map.lookup nt nl
where
toTerm = Text.splitOn " " . unNgramsTerm
(roots, children) = List.partition (\(_t, nre) -> view nre_root nre == Nothing)
(roots, children) = List.partition (\(_t, nre) -> isNothing (view nre_root nre))
$ List.filter (\(_t,nre) -> view nre_list nre == lt'') ns
roots' = map (\(t,nre) -> (t, map toTerm $ unMSet $ view nre_children nre )) roots
......
......@@ -12,7 +12,6 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Use infix" #-}
module Gargantext.API.Ngrams.Tools
where
......@@ -24,11 +23,11 @@ import Data.HashMap.Strict qualified as HM
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
-- import GHC.Conc (TVar, readTVar)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm), NgramsRepoElement(_nre_root, _nre_list) )
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId, ListId )
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
......
......@@ -37,12 +37,12 @@ import Data.Validity ( Validity(..) )
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.Ngrams qualified as Ngrams
import Gargantext.Core.Types (ListType(..), ListId, NodeId, TODO)
import Gargantext.Core.Types.Query (Limit, Offset, MaxSize, MinSize)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (IsString, hash, from, replace, to)
import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
import Gargantext.Utils.Servant (CSV, ZIP)
......@@ -551,7 +551,7 @@ instance ToField NgramsTablePatch
where
toField = toJSONField
instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
instance FromField (PatchMap Ngrams.NgramsType (PatchMap NodeId NgramsTablePatch))
where
fromField = fromField'
......@@ -747,21 +747,21 @@ type RepoCmdM env err m =
-- Instances
instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
instance FromHttpApiData (Map Ngrams.NgramsType (Versioned NgramsTableMap))
where
parseUrlPiece x = maybeToEither x (decode $ cs x)
instance ToHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap)) where
instance ToHttpApiData (Map Ngrams.NgramsType (Versioned NgramsTableMap)) where
toUrlPiece m = cs (encode m)
ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
ngramsTypeFromTabType :: TabType -> Ngrams.NgramsType
ngramsTypeFromTabType tabType =
let here = "Garg.API.Ngrams: " :: Text in
case tabType of
Sources -> TableNgrams.Sources
Authors -> TableNgrams.Authors
Institutes -> TableNgrams.Institutes
Terms -> TableNgrams.NgramsTerms
Sources -> Ngrams.Sources
Authors -> Ngrams.Authors
Institutes -> Ngrams.Institutes
Terms -> Ngrams.NgramsTerms
_ -> panicTrace $ here <> "No Ngrams for this tab"
-- TODO: This `panic` would disapear with custom NgramsType.
......@@ -784,7 +784,7 @@ instance ToSchema UpdateTableNgramsCharts where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_utn_")
------------------------------------------------------------------------
type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
type NgramsList = (Map Ngrams.NgramsType (Versioned NgramsTableMap))
-- | Same as NgramsList, but wraps node_id so that the inner .json file can have proper name
......
......@@ -27,7 +27,9 @@ import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) )
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.NodeStory.Types ( NodeListStory )
import Gargantext.Core.Types
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
......@@ -37,7 +39,6 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context (_context_id)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash)
import Servant (Headers, Header, addHeader)
......
......@@ -14,13 +14,13 @@ Portability : POSIX
module Gargantext.API.Node.Corpus.Export.Types where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, ToParamSchema(..) )
import Data.Text (Text)
import GHC.Generics (Generic)
import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
import Gargantext.Core.Types
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types ( ListId, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Servant
......
......@@ -9,9 +9,8 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Update
where
......@@ -19,16 +18,17 @@ module Gargantext.API.Node.Update
import Control.Lens (view)
import Data.Aeson
import Data.Set qualified as Set
import Data.Swagger
import Data.Swagger ( ToSchema )
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Metrics qualified as Metrics
import Gargantext.API.Ngrams.Types qualified as NgramsTypes
import Gargantext.API.Prelude (GargM, simuLogs)
import Gargantext.Core.Methods.Similarities (GraphMetric(..))
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..), BridgenessMethod(..))
......@@ -38,20 +38,19 @@ import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI)
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(HyperdataPhylo) )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus, NodeAnnuaire) )
import Gargantext.Database.Query.Table.Node (defaultList, getNode)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Prelude
import Gargantext.System.Logging ( MonadLogger )
import Gargantext.Utils.Aeson qualified as GUA
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Gargantext.Utils.UTCTime (timeMeasured)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Utils.UTCTime (timeMeasured)
import Gargantext.System.Logging
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
------------------------------------------------------------------------
type API = Summary " Update node according to NodeType params"
......
......@@ -14,8 +14,8 @@ Portability : POSIX
module Gargantext.Core.Flow.Types where
import Control.Lens
import Gargantext.Database.Admin.Types.Node
import Control.Lens ( Lens' )
import Gargantext.Database.Admin.Types.Node ( Node )
import Gargantext.Database.Schema.Node (node_hash_id)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
......
......@@ -73,10 +73,10 @@ import Database.PostgreSQL.Simple.ToField qualified as PGS
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory.DB
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Text.Ngrams qualified as Ngrams
import Gargantext.Database.Admin.Types.Node ( ListId, NodeId(..) )
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (HasConnectionPool(..))
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Database ( runPGSAdvisoryXactLock, runPGSExecute, runPGSQuery )
......@@ -84,7 +84,7 @@ import Gargantext.Prelude.Database ( runPGSAdvisoryXactLock, runPGSExecute, runP
getNodeStory' :: PGS.Connection -> NodeId -> IO ArchiveList
getNodeStory' c nId = do
--res <- withResource pool $ \c -> runSelect c query :: IO [NodeStoryPoly NodeId Version Int Int NgramsRepoElement]
res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)]
res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, Ngrams.NgramsType, NgramsTerm, NgramsRepoElement)]
-- We have multiple rows with same node_id and different (ngrams_type_id, ngrams_id).
-- Need to create a map: {<node_id>: {<ngrams_type_id>: {<ngrams_id>: <data>}}}
let dbData = map (\(version, ngramsType, ngrams, ngrams_repo_element) ->
......@@ -235,9 +235,9 @@ fixChildrenInNgrams ns = archiveStateFromList $ nsParents <> nsChildrenFixed
)
) <$> nsChildren
-- | Sometimes, when we upload a new list, a child can be left without
-- a parent. Find such ngrams and set their 'root' and 'parent' to
-- 'Nothing'.
-- | (#281) Sometimes, when we upload a new list, a child can be left
-- without a parent. Find such ngrams and set their 'root' and
-- 'parent' to 'Nothing'.
fixChildrenWithNoParent :: NgramsState' -> NgramsState'
fixChildrenWithNoParent ns = archiveStateFromList $ nsParents <> nsChildrenFixed
where
......@@ -341,13 +341,13 @@ fixNodeStoryVersions = do
-- printDebug "[fixNodeStoryVersions] nIds" nIds
mapM_ (\(PGS.Only nId) -> do
-- printDebug "[fixNodeStoryVersions] nId" nId
updateVer c TableNgrams.Authors nId
updateVer c Ngrams.Authors nId
updateVer c TableNgrams.Institutes nId
updateVer c Ngrams.Institutes nId
updateVer c TableNgrams.Sources nId
updateVer c Ngrams.Sources nId
updateVer c TableNgrams.NgramsTerms nId
updateVer c Ngrams.NgramsTerms nId
pure ()
) nIds
......@@ -363,7 +363,7 @@ fixNodeStoryVersions = do
SET version = ?
WHERE node_id = ?
AND ngrams_type_id = ? |]
updateVer :: PGS.Connection -> TableNgrams.NgramsType -> Int64 -> IO ()
updateVer :: PGS.Connection -> Ngrams.NgramsType -> Int64 -> IO ()
updateVer c ngramsType nId = do
maxVer <- runPGSQuery c maxVerQuery (nId, ngramsType) :: IO [PGS.Only (Maybe Int64)]
case maxVer of
......
......@@ -12,7 +12,6 @@ Portability : POSIX
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.NodeStory.DB
( nodeExists
......@@ -27,22 +26,20 @@ module Gargantext.Core.NodeStory.DB
where
import Control.Lens ((^.))
import Control.Monad.Except
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Monoid
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.API.Ngrams.Types
import Gargantext.Core (toDBid)
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Types (NodeId(..), NodeType)
import Gargantext.Core.NodeStory.Types ( a_state, a_version, ArchiveList, ArchiveStateList, NgramsStatePatch' )
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Database.Admin.Types.Node ( NodeId(..), NodeType )
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Schema.Ngrams ()
import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Database
......@@ -70,7 +67,7 @@ getNodesArchiveHistory :: PGS.Connection
-> IO [(NodeId, (Map NgramsType [HashMap NgramsTerm NgramsPatch]))]
getNodesArchiveHistory c nodesId = do
as <- runPGSQuery c query (PGS.Only $ Values fields nodesId)
:: IO [(Int, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
:: IO [(Int, NgramsType, NgramsTerm, NgramsPatch)]
pure $ map (\(nId, ngramsType, terms, patch)
-> ( UnsafeMkNodeId nId
......@@ -96,11 +93,11 @@ insertNodeArchiveHistory _ _ _ [] = pure ()
insertNodeArchiveHistory c nodeId version (h:hs) = do
let tuples = mconcat $ (\(nType, NgramsTablePatch patch) ->
(\(term, p) ->
(nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
(nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, NgramsType, NgramsTerm, NgramsPatch)]
tuplesM <- mapM (\(nId, nType, term, patch) -> do
[PGS.Only ngramsId] <- runPGSReturning c qInsert [PGS.Only term] :: IO [PGS.Only Int]
pure (nId, nType, ngramsId, term, patch)
) tuples :: IO [(NodeId, TableNgrams.NgramsType, Int, NgramsTerm, NgramsPatch)]
) tuples :: IO [(NodeId, NgramsType, Int, NgramsTerm, NgramsPatch)]
_ <- runPGSExecuteMany c query $ ((\(nId, nType, termId, _term, patch) -> (nId, nType, termId, patch, version)) <$> tuplesM)
_ <- insertNodeArchiveHistory c nodeId version hs
pure ()
......
......@@ -56,10 +56,10 @@ import Data.Set qualified as Set
import Database.PostgreSQL.Simple.FromField (FromField(fromField), fromJSONField)
import Gargantext.API.Ngrams.Types
import Gargantext.Database.Admin.Types.Node ( NodeId(..) )
import Gargantext.Core.Text.Ngrams qualified as Ngrams
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (DbCmd')
import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Prelude hiding (to)
import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField)
......@@ -100,8 +100,8 @@ instance (Serialise s, Serialise p) => Serialise (Archive s p)
type NodeListStory = NodeStory NgramsState' NgramsStatePatch'
-- NOTE: 'type NgramsTableMap = Map NgramsTerm NgramsRepoElement'
type NgramsState' = Map TableNgrams.NgramsType NgramsTableMap
type NgramsStatePatch' = PatchMap TableNgrams.NgramsType NgramsTablePatch
type NgramsState' = Map Ngrams.NgramsType NgramsTableMap
type NgramsStatePatch' = PatchMap Ngrams.NgramsType NgramsTablePatch
-- instance Serialise NgramsStatePatch'
instance FromField (Archive NgramsState' NgramsStatePatch')
where
......@@ -167,7 +167,7 @@ initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive
archive = Archive { _a_version = 0
, _a_state = ngramsTableMap
, _a_history = [] }
ngramsTableMap = Map.singleton TableNgrams.NgramsTerms
ngramsTableMap = Map.singleton Ngrams.NgramsTerms
$ Map.fromList
[ (n ^. ne_ngrams, ngramsElementToRepo n)
| n <- mockTable ^. _NgramsTable
......@@ -231,8 +231,8 @@ class HasNodeArchiveStoryImmediateSaver env where
type ArchiveStateList = [(TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)]
type ArchiveStateSet = Set.Set (TableNgrams.NgramsType, NgramsTerm)
type ArchiveStateList = [(Ngrams.NgramsType, NgramsTerm, NgramsRepoElement)]
type ArchiveStateSet = Set.Set (Ngrams.NgramsType, NgramsTerm)
------------------------------------------------------------------------
------------------------------------------------------------------------
......
......@@ -46,11 +46,11 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex)
import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Query.Table.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (show, undefined)
import Gargantext.Utils.Zip qualified as UZip
import Protolude
import Protolude ( show )
import System.FilePath (takeExtension)
------------------------------------------------------------------------
......
This diff is collapsed.
......@@ -9,11 +9,8 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Group
where
......@@ -23,8 +20,8 @@ import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.List.Group.WithScores ( groupWithScores' )
import Gargantext.Core.Text.List.Social.Prelude ( FlowListScores, FlowCont )
import Gargantext.Prelude
------------------------------------------------------------------------
toGroupedTree :: (Ord a, Monoid a, HasSize a)
......@@ -43,9 +40,7 @@ setScoresWithMap :: (Ord a, Ord b, Monoid b) => HashMap NgramsTerm b
-> HashMap NgramsTerm (GroupedTreeScores b)
setScoresWithMap m = setScoresWith (score m)
where
score m' t = case HashMap.lookup t m' of
Nothing -> mempty
Just r -> r
score m' t = fromMaybe mempty (HashMap.lookup t m')
setScoresWith :: (Ord a, Ord b)
=> (NgramsTerm -> b)
......@@ -58,8 +53,7 @@ setScoresWith f = Map.mapWithKey (\k v -> over gts'_children (setScoresWith f)
)
-}
setScoresWith f = HashMap.mapWithKey (\k v -> v { _gts'_score = f k
, _gts'_children = setScoresWith f
$ view gts'_children v
}
)
, _gts'_children = setScoresWith f $ view gts'_children v
}
)
------------------------------------------------------------------------
......@@ -25,11 +25,11 @@ import Data.HashSet qualified as Set
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Text qualified as Text
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types ( toNgramsPatch, NgramsPatch, NgramsTerm(..) )
import Gargantext.Core (Lang(..), Form, Lem, NLPServerConfig)
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.List.Group.Prelude ( Stem )
import Gargantext.Core.Text.List.Social.Patch ( addScorePatch )
import Gargantext.Core.Text.List.Social.Prelude ( FlowCont, FlowListScores )
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Prelude
......@@ -43,8 +43,8 @@ addScoreStem groupParams ngrams fl = foldl' addScorePatch fl
------------------------------------------------------------------------
-- | Main Types
data StopSize = StopSize {unStopSize :: !Int}
deriving (Eq)
newtype StopSize = StopSize {unStopSize :: Int}
deriving (Eq, Show)
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
......@@ -61,7 +61,7 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, _gwl_nlp_config :: !NLPServerConfig
, _gwl_map :: !(HashMap Form Lem)
}
deriving (Eq)
deriving (Eq, Show)
------------------------------------------------------------------------
groupWith :: GroupParams
......@@ -80,7 +80,6 @@ groupWith (GroupParams { unGroupParams_lang = l }) t =
$ Text.splitOn " "
$ Text.replace "-" " "
$ unNgramsTerm t
-- | This lemmatization group done with CoreNLP algo (or others)
groupWith (GroupWithPosTag { _gwl_map = m }) t =
case HashMap.lookup (unNgramsTerm t) m of
......
......@@ -18,24 +18,23 @@ import Data.Aeson
import Data.HashMap.Strict (HashMap)
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Pool
import Data.Swagger
import Data.Pool ( withResource )
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, defaultSchemaOptions )
import Data.Text qualified as T
import Data.Vector qualified as V
import GHC.Generics
import Gargantext.API.Ngrams.Types (NgramsTerm, NgramsPatch)
import Gargantext.Core.NodeStory (getNodesArchiveHistory)
import Gargantext.Core.NodeStory.DB ( getNodesArchiveHistory )
import Gargantext.Core.Text.List.Social.Find (findListsId)
import Gargantext.Core.Text.List.Social.Patch (addScorePatches)
import Gargantext.Core.Text.List.Social.Prelude (FlowCont, FlowListScores)
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Types.Individu (User)
import Gargantext.Database.Admin.Types.Node (ListId)
import Gargantext.Database.Prelude (DBCmd, connPool)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree (NodeMode(Private), HasTreeError)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import Test.QuickCheck
import Test.QuickCheck ( Arbitrary(arbitrary), oneof, arbitraryBoundedEnum )
import Web.Internal.HttpApiData (ToHttpApiData, FromHttpApiData, parseUrlPiece, toUrlPiece)
------------------------------------------------------------------------
------------------------------------------------------------------------
......
......@@ -14,14 +14,13 @@ module Gargantext.Core.Text.List.Social.Patch
import Control.Lens hiding (cons)
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Patch.Class qualified as Patch (Replace(..))
import Gargantext.API.Ngrams.Prelude (unMSet, patchMSet_toList)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types ( NgramsTerm, nre_children, nre_list, MSet, NgramsPatch(..) )
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Database.Admin.Types.Node ( ListId )
import Gargantext.Prelude
addScorePatches :: NgramsType -> [ListId]
......@@ -40,7 +39,7 @@ addScorePatchesList :: NgramsType
addScorePatchesList nt repo fl lid =
foldl' addScorePatch fl patches
where
patches = maybe [] (List.concat . (map HashMap.toList)) patches'
patches = maybe [] (concatMap HashMap.toList) patches'
patches' = do
lists <- Map.lookup lid repo
......
{-|
Module : Gargantext.Core.Text.Ngrams
Description : Main Ngrams types
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Ngrams
where
import Codec.Serialise (Serialise())
import Control.Lens (over)
import Data.Aeson ( ToJSON(..), FromJSON(..), FromJSONKey(..), FromJSONKeyFunction(..), Value(String), ToJSONKey(..) )
import Data.Aeson.Types (toJSONKeyText)
import Data.Text (pack)
import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Core.Types (TODO(..))
import Gargantext.Database.Schema.Prelude hiding (over)
import Gargantext.Prelude
import Servant (FromHttpApiData(..), ToHttpApiData(..))
import Test.QuickCheck (elements)
import Text.Read (read)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance Serialise NgramsType
instance FromJSON NgramsType
where
parseJSON (String "Authors") = pure Authors
parseJSON (String "Institutes") = pure Institutes
parseJSON (String "Sources") = pure Sources
parseJSON (String "Terms") = pure NgramsTerms
parseJSON (String "NgramsTerms") = pure NgramsTerms
parseJSON _ = mzero
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
where
toJSON Authors = String "Authors"
toJSON Institutes = String "Institutes"
toJSON Sources = String "Sources"
toJSON NgramsTerms = String "Terms"
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance Arbitrary NgramsType where
arbitrary = elements [ minBound .. maxBound ]
ngramsTypes :: [NgramsType]
ngramsTypes = [minBound..]
instance ToSchema NgramsType
{- where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
--}
data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
, _ngramsSize :: Int
}
deriving (Generic, Show, Eq, Ord)
instance Hashable Ngrams
makeLenses ''Ngrams
instance PGS.ToRow Ngrams where
toRow (UnsafeNgrams t s) = [toField t, toField s]
------------------------------------------------------------------------
-------------------------------------------------------------------------
-- Named entity are typed ngrams of Terms Ngrams
data NgramsT a =
NgramsT { _ngramsType :: NgramsType
, _ngramsT :: a
} deriving (Generic, Show, Eq, Ord)
makeLenses ''NgramsT
instance Functor NgramsT where
fmap = over ngramsT
......@@ -37,27 +37,27 @@ compute graph
module Gargantext.Core.Text.Terms
where
import Control.Lens
import Control.Lens ( (^.), view, over, makeLenses )
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Set qualified as Set
import Data.Text qualified as Text
import Data.Traversable
import GHC.Base (String)
import Gargantext.Core
import Gargantext.Core ( Lang, NLPServerConfig, PosTagAlgo )
import Gargantext.Core.Text (sentences, HasText(..))
import Gargantext.Core.Text.Ngrams (Ngrams(..), NgramsType(..), ngramsTerms)
import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
import Gargantext.Core.Text.Terms.Mono (monoTerms)
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Core.Text.Terms.Multi (multiterms)
import Gargantext.Core.Types
import Gargantext.Core.Types ( TermsCount, POS, Terms(Terms), TermsWithCount )
import Gargantext.Core.Utils (groupWithCounts)
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.Ngrams (insertNgrams)
import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgramsPostag, np_form, np_lem)
import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..), ngramsTerms, text2ngrams, NgramsId)
import Gargantext.Database.Schema.Ngrams (text2ngrams, NgramsId)
import Gargantext.Prelude
data TermType lang
......@@ -103,8 +103,7 @@ withLang (Unsupervised {..}) ns = Unsupervised { _tt_model = m', .. }
$ fmap toToken
$ uniText
$ Text.intercalate " . "
$ List.concat
$ map hasText ns
$ concatMap hasText ns
just_m -> just_m
withLang l _ = l
......@@ -126,7 +125,11 @@ class ExtractNgramsT h
------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms ng1 ng2) =
NgramsPostag l pa po form lem
NgramsPostag { _np_lang = l
, _np_algo = pa
, _np_postag = po
, _np_form = form
, _np_lem = lem }
where
form = text2ngrams $ Text.intercalate " " ng1
lem = text2ngrams $ Text.intercalate " " $ Set.toList ng2
......@@ -138,7 +141,7 @@ cleanNgrams s ng
| otherwise = text2ngrams (Text.take s (ng ^. ngramsTerms))
cleanExtractedNgrams :: Int -> ExtractedNgrams -> ExtractedNgrams
cleanExtractedNgrams s (SimpleNgrams ng) = SimpleNgrams $ (cleanNgrams s) ng
cleanExtractedNgrams s (SimpleNgrams ng) = SimpleNgrams $ cleanNgrams s ng
cleanExtractedNgrams s (EnrichedNgrams ng) = EnrichedNgrams $ over np_form (cleanNgrams s)
$ over np_lem (cleanNgrams s) ng
......@@ -156,8 +159,7 @@ insertExtractedNgrams ngs = do
m2 <- insertNgramsPostag (map unEnrichedNgrams e)
--printDebug "terms" m2
let result = HashMap.union m1 m2
pure result
pure $ HashMap.union m1 m2
isSimpleNgrams :: ExtractedNgrams -> Bool
isSimpleNgrams (SimpleNgrams _) = True
......@@ -189,10 +191,10 @@ type MinNgramSize = Int
termsUnsupervised :: TermType Lang -> Text -> [TermsWithCount]
termsUnsupervised (Unsupervised { _tt_model = Nothing }) = panicTrace "[termsUnsupervised] no model"
termsUnsupervised (Unsupervised { _tt_model = Just _tt_model, .. }) =
map (\(t, cnt) -> (text2term _tt_lang t, cnt))
map (first (text2term _tt_lang))
. groupWithCounts
-- . List.nub
. (List.filter (\l' -> List.length l' >= _tt_windowSize))
. List.filter (\l' -> List.length l' >= _tt_windowSize)
. List.concat
. mainEleveWith _tt_model _tt_ngramsSize
. uniText
......@@ -200,19 +202,18 @@ termsUnsupervised _ = undefined
newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t)
newTries n t = buildTries n (toToken <$> uniText t)
-- | TODO removing long terms > 24
uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation))
. map tokenize
. sentences -- TODO get sentences according to lang
. Text.toLower
uniText = map (List.filter (not . isPunctuation) . tokenize)
. sentences -- TODO get sentences according to lang
. Text.toLower
text2term :: Lang -> [Text] -> Terms
text2term _ [] = Terms [] Set.empty
text2term lang txt = Terms txt (Set.fromList $ map (stem lang PorterAlgorithm) txt)
isPunctuation :: Text -> Bool
isPunctuation x = List.elem x $ (Text.pack . pure)
isPunctuation x = List.elem x $ Text.pack . pure
<$> ("!?(),;.:" :: String)
......@@ -11,25 +11,21 @@ Multi-terms are ngrams where n > 1.
-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Text.Terms.Multi (multiterms, multiterms_rake, tokenTagsWith, tokenTags, cleanTextForNLP)
where
import Control.Applicative
import Data.Attoparsec.Text as DAT
import Data.Text hiding (map, group, filter, concat)
import Data.Attoparsec.Text as DAT ( digit, space, notChar, string )
import Gargantext.Core (Lang(..), NLPServerConfig(..), PosTagAlgo(..))
import Gargantext.Core.Text.Terms.Multi.Lang.En qualified as En
import Gargantext.Core.Text.Terms.Multi.Lang.Fr qualified as Fr
import Gargantext.Core.Text.Terms.Multi.PosTagging
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import Gargantext.Core.Text.Terms.Multi.PosTagging ( corenlp, tokens2tokensTags )
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types ( PosSentences(_sentences), Sentence(_sentenceTokens) )
import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
import Gargantext.Core.Types
import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(TokenTag, _my_token_pos) )
import Gargantext.Core.Utils (groupWithCounts)
import Gargantext.Prelude
import Gargantext.Utils.SpacyNLP qualified as SpacyNLP
import Replace.Attoparsec.Text as RAT
import Replace.Attoparsec.Text as RAT ( streamEdit )
-------------------------------------------------------------------
type NLP_API = Lang -> Text -> IO PosSentences
......
......@@ -16,7 +16,7 @@ group the tokens into extracted terms.
module Gargantext.Core.Text.Terms.Multi.Group (group2)
where
import Gargantext.Core.Types
import Gargantext.Core.Types ( POS, TokenTag(TokenTag) )
import Gargantext.Prelude
-- | FIXME p1 and p2 not really taken into account
......
......@@ -17,8 +17,8 @@ module Gargantext.Core.Text.Terms.Multi.Lang.En (groupTokens)
where
import Gargantext.Prelude
import Gargantext.Core.Types
import Gargantext.Core.Text.Terms.Multi.Group
import Gargantext.Core.Types ( POS(CC, IN, DT, NP, JJ), TokenTag )
import Gargantext.Core.Text.Terms.Multi.Group ( group2 )
------------------------------------------------------------------------
-- | Rule grammar to group tokens
......@@ -31,8 +31,7 @@ groupTokens ntags = group2 NP NP
-- $ group2 VB NP
$ group2 JJ NP
$ group2 JJ JJ
$ group2 JJ CC
$ ntags
$ group2 JJ CC ntags
------------------------------------------------------------------------
--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
......
......@@ -9,32 +9,31 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Chart
where
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict (toList)
import Data.Set qualified as Set
import Data.Vector qualified as V
import Gargantext.API.Ngrams.NgramsTree
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.API.Ngrams.NgramsTree ( toTree, NgramsTree )
import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getListNgrams, getRepo, mapTermListRoot )
import Gargantext.API.Ngrams.Types ( NgramsTerm(NgramsTerm) )
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Types
import Gargantext.Core.Viz.Types
import Gargantext.Database.Action.Metrics.NgramsByContext
import Gargantext.Database.Admin.Config
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Types.Main ( ListType )
import Gargantext.Database.Admin.Types.Node ( NodeType(NodeList), CorpusId, contextId2NodeId )
import Gargantext.Core.Viz.Types ( Histo(Histo) )
import Gargantext.Database.Action.Metrics.NgramsByContext ( countContextsByNgramsWith, getContextsByNgramsOnlyUser )
import Gargantext.Database.Admin.Config ( userMaster )
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Query.Table.Node ( getListsWithParentId )
import Gargantext.Database.Query.Table.Node.Select ( selectNodesWithUsername )
import Gargantext.Database.Query.Table.NodeContext (selectDocsDates)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Node ( NodePoly(_node_id) )
import Gargantext.Prelude hiding (toList)
import qualified Data.Set as Set
histoData :: CorpusId -> DBCmd err Histo
......
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Graph
where
......@@ -19,9 +17,9 @@ import Data.ByteString.Lazy as DBL (readFile, writeFile)
import Data.HashMap.Strict (HashMap, lookup)
import Data.Text qualified as Text
import Gargantext.API.Ngrams.Types (NgramsTerm(..), NgramsRepoElement(..), mSetToList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Viz.Graph.Types
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Text.Read qualified as Text
......
......@@ -19,31 +19,31 @@ module Gargantext.Core.Viz.Graph.API
where
import Control.Lens (set, (^.), _Just, (^?), at)
import Data.Aeson
import Data.Aeson ( ToJSON, FromJSON )
import Data.HashMap.Strict qualified as HashMap
import Data.Swagger
import Data.Swagger ( ToSchema )
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Errors.Types
import Gargantext.API.Admin.Orchestrator.Types ( JobLog )
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Prelude
import Gargantext.API.Prelude (GargM, GargServer)
import Gargantext.Core.Methods.Similarities (Similarity(..), GraphMetric(..), withMetric)
import Gargantext.Core.NodeStory
import Gargantext.Core.Types.Main
import Gargantext.Core.NodeStory.Types ( HasNodeStory, a_version, unNodeStory, NodeListStory )
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Core.Viz.Graph.GEXF ()
import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
import Gargantext.Core.Viz.Graph.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Config ( userMaster )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node ( getOrMkList, getNodeWith, defaultList, getClosestParentIdByType )
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Query.Table.Node.Select ( selectNodesWithUsername )
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Node (node_hyperdata, node_name)
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant
......
......@@ -11,39 +11,40 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Core.Viz.Graph.Tools
where
import Data.Aeson
import Data.Aeson ( ToJSON, FromJSON )
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.HashSet qualified as HashSet
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Swagger hiding (items)
import Data.Swagger ( ToSchema )
import Data.Text qualified as Text
import Data.Vector.Storable qualified as Vec
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
import Gargantext.Core.Statistics
import Gargantext.Core.Statistics ( pcaReduceTo, Dimension(Dimension) )
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, {-recursiveClustering,-} recursiveClustering', setNodes2clusterNodes)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..))
import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Graph.BAC.ProxemyOptim qualified as BAC
import Graph.Types (ClusterNode)
import IGraph qualified as Igraph
import IGraph.Algorithms.Layout qualified as Layout
import IGraph.Random -- (Gen(..))
import IGraph.Random ( Gen ) -- (Gen(..))
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
data PartitionMethod = Spinglass | Confluence | Infomap
deriving (Generic, Eq, Ord, Enum, Bounded, Show)
......
......@@ -24,12 +24,11 @@ import Data.Text (pack)
import Database.PostgreSQL.Simple.FromField (FromField(..))
import Gargantext.API.Ngrams.Types (NgramsTerm)
import Gargantext.Core.Methods.Similarities (GraphMetric)
import Gargantext.Core.Types (ListId)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude (Hyperdata)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Admin.Types.Node (ListId, NodeId)
import Gargantext.Database.Prelude (fromField')
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField)
import Test.QuickCheck (elements)
......
......@@ -11,18 +11,17 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Gargantext.Core.Viz.Phylo.API.Tools
where
import Control.Lens hiding (Context)
import Control.Lens (to, view)
import Data.Aeson (Value, decodeFileStrict, encode, eitherDecodeFileStrict')
import Data.ByteString.Lazy qualified as Lazy
import Data.Map.Strict qualified as Map
import Data.Proxy
import Data.Set qualified as Set
import Data.Text (pack)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
......@@ -30,31 +29,31 @@ import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
import Gargantext.API.Ngrams.Prelude (getTermList)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core (withDefaultLanguage, Lang)
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
import Gargantext.Core.Types (Context, nodeId2ContextId)
import Gargantext.Core.Types.Main (ListType(MapTerm))
import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..), HyperdataCorpus(..))
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(..) )
import Gargantext.Database.Admin.Types.Node (Context, CorpusId, ContextId, PhyloId, nodeId2ContextId)
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _context_id) )
import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata )
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging
import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM )
import Gargantext.Utils.UTCTime (timeMeasured)
import Prelude qualified
import System.FilePath ((</>))
import System.IO.Temp (withTempDirectory)
import System.Process qualified as Shell
import Gargantext.Utils.UTCTime (timeMeasured)
--------------------------------------------------------------------
getPhyloData :: HasNodeError err
......
......@@ -13,30 +13,30 @@ Portability : POSIX
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
where
import Control.Lens hiding (Level)
import Control.Lens (to, view)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Proxy
import Data.Set qualified as Set
import Data.Text qualified as Text
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core (HasDBid, withDefaultLanguage)
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList ( buildPatterns, termsInText, Patterns )
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Node ( NodeType, CorpusId )
import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(_hd_abstract, _hd_publication_year) )
import Gargantext.Database.Query.Table.Node(defaultList, getNodeWith)
import Gargantext.Database.Query.Table.NodeContext (selectDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Node ( node_hyperdata )
import Gargantext.Prelude hiding (to)
type MinSizeBranch = Int
......
......@@ -51,7 +51,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
import Conduit
import Control.Lens hiding (elements, Indexed)
import Control.Lens ( (^.), to, view, over )
import Data.Bifunctor qualified as B
import Data.Conduit qualified as C
import Data.Conduit.Internal (zipSources)
......@@ -60,56 +60,57 @@ import Data.Conduit.List qualified as CList
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Proxy
import Data.Set qualified as Set
import Data.Text qualified as T
import EPO.API.Client.Types qualified as EPO
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.Core (Lang(..), NLPServerConfig)
import Gargantext.Core (withDefaultLanguage)
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms), Ngrams(_ngramsTerms))
import Gargantext.Core (Lang(..), NLPServerConfig, withDefaultLanguage)
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.List.Group.WithStem (GroupParams(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Types (HasValidationError, TermsCount)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main
import Gargantext.Core.Types.Main ( CorpusName, ListType(MapTerm) )
import Gargantext.Core.Types.Query (Limit)
import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances
import Gargantext.Database.Action.Flow.List
import Gargantext.Database.Action.Flow.List ( flowList_DbRepo, toNodeNgramsW' )
import Gargantext.Database.Action.Flow.Types ( do_api, DataOrigin(..), DataText(..), FlowCorpus )
import Gargantext.Database.Action.Flow.Utils (docNgrams, documentIdWithNgrams, insertDocNgrams, insertDocs, mapNodeIdNgrams)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(toHyperdataDocument) )
import Gargantext.Database.Admin.Types.Node hiding (DEBUG) -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude (DbCmd', DBCmd, hasConfig)
import Gargantext.Database.Query.Table.ContextNodeNgrams2
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.ContextNodeNgrams2 ( ContextNodeNgrams2Poly(..), insertContextNodeNgrams2 )
import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotExists, getOrMkList, getNodeWith )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Query.Table.Node.Document.Insert ( ToNode(toNode) ) -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Database.Schema.Ngrams ( indexNgrams, text2ngrams )
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Config (GargConfig(..))
import Gargantext.System.Logging
import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..))
import Gargantext.System.Logging ( logLocM, LogLevel(DEBUG), MonadLogger )
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
import PUBMED.Types qualified as PUBMED
------------------------------------------------------------------------
-- Imports for upgrade function
import Gargantext.Database.Query.Tree (HasTreeError)
import Gargantext.Database.Query.Tree.Error ( HasTreeError )
------------------------------------------------------------------------
......@@ -182,11 +183,11 @@ flowDataText u (DataOld ids) tt cid mfslw _ = do
_ <- Doc.add userCorpusId (map nodeId2ContextId ids)
flowCorpusUser (_tt_lang tt) u userCorpusId listId corpusType mfslw
where
corpusType = (Nothing :: Maybe HyperdataCorpus)
corpusType = Nothing :: Maybe HyperdataCorpus
flowDataText u (DataNew (mLen, txtC)) tt cid mfslw jobHandle = do
$(logLocM) DEBUG $ T.pack $ "Found " <> show mLen <> " new documents to process"
for_ (mLen <&> fromInteger) (`addMoreSteps` jobHandle)
flowCorpus u (Right [cid]) tt mfslw (fromMaybe 0 mLen, (transPipe liftBase txtC)) jobHandle
flowCorpus u (Right [cid]) tt mfslw (fromMaybe 0 mLen, transPipe liftBase txtC) jobHandle
------------------------------------------------------------------------
-- TODO use proxy
......@@ -199,13 +200,13 @@ flowAnnuaire :: ( DbCmd' env err m
, MonadJobStatus m )
=> User
-> Either CorpusName [CorpusId]
-> (TermType Lang)
-> TermType Lang
-> FilePath
-> JobHandle m
-> m AnnuaireId
flowAnnuaire u n l filePath jobHandle = do
-- TODO Conduit for file
docs <- liftBase $ ((readFile_Annuaire filePath) :: IO [HyperdataContact])
docs <- liftBase $ (readFile_Annuaire filePath :: IO [HyperdataContact])
flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing (fromIntegral $ length docs, yieldMany docs) jobHandle
------------------------------------------------------------------------
......@@ -362,10 +363,11 @@ flowCorpusUser l user userCorpusId listId ctype mfslw = do
_ <- reIndexWith userCorpusId listId NgramsTerms (Set.singleton MapTerm)
_ <- updateContextScore userCorpusId listId
_ <- updateNgramsOccurrences userCorpusId listId
pure userCorpusId
-- | This function is responsible for contructing terms.
buildSocialList :: ( HasNodeError err
, HasValidationError err
, HasNLPServer env
......@@ -389,8 +391,12 @@ buildSocialList l user userCorpusId listId ctype mfslw = do
nlpServer <- view (nlpServerGet l)
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw
$ GroupWithPosTag l nlpServer HashMap.empty
-- let gp = GroupParams { unGroupParams_lang = l
-- , unGroupParams_len = 10
-- , unGroupParams_limit = 10
-- , unGroupParams_stopSize = StopSize 10 }
let gp = GroupWithPosTag l nlpServer HashMap.empty
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp
-- printDebug "flowCorpusUser:ngs" ngs
......@@ -425,7 +431,7 @@ insertMasterDocs ncs c lang hs = do
(extractNgramsT ncs $ withLang lang documentsWithId)
(map (B.first contextId2NodeId) documentsWithId)
lId <- getOrMkList masterCorpusId masterUserId
lId <- getOrMkList masterCorpusId masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_ <- saveDocNgramsWith lId mapNgramsDocs'
......@@ -445,13 +451,13 @@ saveDocNgramsWith lId mapNgramsDocs' = do
-- new
mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second Map.keys)
$ map (bimap _ngramsTerms Map.keys)
$ HashMap.toList mapNgramsDocs
--printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
let ngrams2insert = catMaybes [ ContextNodeNgrams2 <$> Just (nodeId2ContextId nId)
<*> (getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms''))
let ngrams2insert = catMaybes [ ContextNodeNgrams2 (nodeId2ContextId nId)
<$> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
<*> Just (fromIntegral w :: Double)
| (terms'', mapNgramsTypes) <- HashMap.toList mapNgramsDocs
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
......@@ -498,5 +504,5 @@ reIndexWith cId lId nt lts = do
$ map (docNgrams corpusLang nt ts) docs
-- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc
mapM_ (saveDocNgramsWith lId) ngramsByDoc
pure ()
......@@ -20,15 +20,17 @@ module Gargantext.Database.Action.Flow.Extract
import Control.Lens ((^.), _Just, view)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as DM
import Gargantext.Core (Lang, NLPServerConfig, PosTagAlgo(CoreNLP))
import Gargantext.Core (Lang, NLPServerConfig(server))
import Gargantext.Core.Text (HasText(..))
import Gargantext.Core.Text.Corpus.Parsers (splitOn)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms (ExtractNgramsT, ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractNgramsT, extractTerms, tt_lang)
import Gargantext.Core.Types (POS(NP), TermsCount)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact, HyperdataDocument, cw_lastName, hc_who, hd_authors, hd_bdd, hd_institutes, hd_source)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source )
import Gargantext.Database.Admin.Types.Node ( Node )
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Ngrams ( text2ngrams )
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude
......@@ -49,6 +51,9 @@ instance ExtractNgramsT HyperdataContact
pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
-- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ".
instance ExtractNgramsT HyperdataDocument
where
extractNgramsT :: NLPServerConfig
......@@ -72,9 +77,8 @@ instance ExtractNgramsT HyperdataDocument
$ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
$ doc ^. hd_authors
termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang ^. tt_lang) CoreNLP NP t, cnt))
<$> concat
<$> liftBase (extractTerms ncs lang $ hasText doc)
termsWithCounts' <- map (first (enrichedTerms (lang ^. tt_lang) (server ncs) NP)) . concat <$>
liftBase (extractTerms ncs lang $ hasText doc)
pure $ HashMap.fromList
$ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ]
......
......@@ -11,8 +11,6 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Database.Action.Flow.List
where
......@@ -27,12 +25,11 @@ import Gargantext.API.Ngrams (saveNodeStory)
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory (HasNodeStory, a_history, a_state, a_version)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types (HasValidationError(..), assertValid)
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb,{- getCgramsId -})
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (toList)
-- FLOW LIST
......@@ -169,7 +166,7 @@ listInsert lId ngs = mapM_ (\(typeList, ngElmts)
-- the repo, they will be ignored.
putListNgrams :: (HasValidationError err, HasNodeStory env err m)
=> NodeId
-> TableNgrams.NgramsType
-> NgramsType
-> [NgramsElement]
-> m ()
putListNgrams _ _ [] = pure ()
......@@ -179,7 +176,7 @@ putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
putListNgrams' :: (HasValidationError err, HasNodeStory env err m)
=> NodeId
-> TableNgrams.NgramsType
-> NgramsType
-> Map NgramsTerm NgramsRepoElement
-> m ()
putListNgrams' listId ngramsType' ns = do
......
......@@ -11,7 +11,6 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE Arrows #-}
module Gargantext.Database.Action.Flow.Pairing
......@@ -25,28 +24,27 @@ import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Set qualified as Set
import Data.Text qualified as Text
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getRepo, groupNodesByNgrams, mapTermListRoot )
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Metrics.CharByChar (levenshtein)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types (TableResult(..))
import Gargantext.Core.Types.Main
import Gargantext.Database
import Gargantext.Core.Types.Main ( ListType(CandidateTerm, MapTerm) )
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser)
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
import Gargantext.Database.Admin.Config ( userMaster )
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_firstName, cw_lastName, hc_who ) -- (HyperdataContact(..))
import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
import Gargantext.Database.Query.Prelude (returnA, queryNodeNodeTable)
import Gargantext.Database.Prelude (Cmd, DBCmd, runOpaQuery)
import Gargantext.Database.Query.Prelude (returnA)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext_NodeContext (insertNodeContext_NodeContext)
import Gargantext.Database.Query.Table.NodeNode (insertNodeNode)
import Gargantext.Database.Prelude (Cmd, runOpaQuery)
import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Schema.Node ( node_hyperdata, node_id, node_typename, queryNodeTable )
import Gargantext.Prelude hiding (sum)
import Opaleye
......
......@@ -23,23 +23,23 @@ import Data.Aeson (ToJSON)
import Data.Aeson.TH (deriveJSON)
import Data.HashMap.Strict (HashMap)
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
import Gargantext.Core.Flow.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Terms
import Gargantext.Core.Flow.Types ( UniqId )
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text ( HasText )
import Gargantext.API.Admin.Orchestrator.Types qualified as API
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms ( ExtractNgramsT )
import Gargantext.Core.Types (HasValidationError, TermsCount)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Prelude (CmdM)
import Gargantext.Database.Query.Table.Node.Document.Insert
import Gargantext.Database.Query.Table.Node.Document.Insert ( UniqParameters, InsertDb, ToNode, AddUniqId )
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Types (Indexed)
import Gargantext.Prelude
import Gargantext.System.Logging
import Gargantext.System.Logging ( MonadLogger )
type FlowCmdM env err m =
......
......@@ -11,9 +11,6 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Database.Action.Flow.Utils
( docNgrams
, documentIdWithNgrams
......@@ -31,12 +28,13 @@ import Data.Text qualified as T
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core (Lang, toDBid)
import Gargantext.Core.Flow.Types (UniqId, uniqId)
import Gargantext.Core.Text.Ngrams ( Ngrams, NgramsType )
import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatternsWith, termsInText)
import Gargantext.Core.Types (TermsCount)
import Gargantext.Core.Utils (addTuples)
import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap
import Gargantext.Database.Action.Flow.Types (DocumentIdWithNgrams(..), FlowInsertDB)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument, hd_abstract, hd_title)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_abstract, hd_title )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (DBCmd, DbCmd')
import Gargantext.Database.Query.Table.ContextNodeNgrams
......@@ -44,8 +42,8 @@ import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Schema.Context (context_hyperdata, context_id)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Types
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..))
import Gargantext.Database.Types ( Indexed(..), index )
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
......
......@@ -27,11 +27,12 @@ import Database.PostgreSQL.Simple.ToField qualified as DPS
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Database.PostgreSQL.Simple.Types qualified as DPST
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Data.HashMap.Strict.Utils as HM ( unionsWith )
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId (..), MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (DBCmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Ngrams () -- toDBid instance
import Gargantext.Prelude
-- | fst is size of Supra Corpus
......
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Action.Metrics.TFICF
where
......@@ -20,11 +18,11 @@ import Data.Set qualified as Set
import Gargantext.API.Ngrams.Types
import Gargantext.Core
import Gargantext.Core.Text.Metrics.TFICF
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, {-getOccByNgramsOnlyFast,-} getOccByNgramsOnlyFast_withSample)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.NodeContext (selectCountDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
{-
......
......@@ -23,7 +23,7 @@ module Gargantext.Database.Action.Search (
import Control.Arrow (returnA)
import Control.Lens ((^.), view)
import Data.BoolExpr
import Data.BoolExpr ( BoolExpr(..), Signed(Negative, Positive) )
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Profunctor.Product (p4)
......@@ -31,25 +31,26 @@ import Data.Set qualified as Set
import Data.Text (unpack)
import Data.Text qualified as T
import Data.Time (UTCTime)
import Gargantext.Core
import Gargantext.Core ( Lang(EN), HasDBid(toDBid) )
import Gargantext.Core.Text.Corpus.Query qualified as API
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types
import Gargantext.Core.Types.Query (IsTrash, Limit, Offset)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Prelude (runOpaQuery, runCountOpaQuery, DBCmd)
import Gargantext.Database.Query.Facet
import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Table.Context
import Gargantext.Database.Query.Filter ( limit', offset' )
import Gargantext.Database.Query.Table.Context ( queryContextSearchTable )
import Gargantext.Database.Query.Table.ContextNodeNgrams (queryContextNodeNgramsTable)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node ( queryNodeSearchTable, defaultList )
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Query.Table.NodeContext
import Gargantext.Database.Query.Table.NodeContext_NodeContext
import Gargantext.Database.Schema.NodeContext_NodeContext ( NodeContext_NodeContextRead, queryNodeContext_NodeContextTable, ncnc_nodecontext2, ncnc_nodecontext1 )
import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.ContextNodeNgrams (ContextNodeNgramsPoly(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Node ( NodePolySearch(_ns_hyperdata, _ns_search, _ns_typename, _ns_id) )
import Gargantext.Prelude hiding (groupBy)
import Opaleye hiding (Order)
import Opaleye qualified as O hiding (Order)
......@@ -59,7 +60,7 @@ import Opaleye qualified as O hiding (Order)
--
queryToTsSearch :: API.Query -> Field SqlTSQuery
queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST)
queryToTsSearch q = sqlToTSQuery $ T.unpack $ API.interpretQuery q transformAST
where
-- It's important to understand how things work under the hood: When we perform
......
......@@ -13,7 +13,6 @@ Portability : POSIX
{-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.Ngrams
( module Gargantext.Database.Schema.Ngrams
......@@ -30,6 +29,7 @@ import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types
import Gargantext.Database.Prelude (runOpaQuery, formatPGSQuery, runPGSQuery, DBCmd)
import Gargantext.Database.Query.Join (leftJoin3)
......
......@@ -24,11 +24,12 @@ import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Core.Text.Ngrams (Ngrams, ngramsSize, ngramsTerms)
import Gargantext.Core.Types ( POS )
import Gargantext.Database.Prelude (runPGSQuery, runPGSQuery_, DBCmd)
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.Ngrams ( NgramsId, insertNgrams )
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Types
import Gargantext.Database.Types ( Indexed(Indexed) )
import Gargantext.Prelude
data NgramsPostag = NgramsPostag { _np_lang :: !Lang
......@@ -87,7 +88,7 @@ insertNgramsPostag' :: [NgramsPostagInsert] -> DBCmd err [Indexed Text Int]
insertNgramsPostag' ns = runPGSQuery queryInsertNgramsPostag (PGS.Only $ Values fields ns)
where
fields = map (\t -> QualifiedIdentifier Nothing t) $ snd fields_name
fields = map (QualifiedIdentifier Nothing) $ snd fields_name
fields_name :: ( [Text], [Text])
fields_name = ( ["lang_id", "algo_id", "postag", "form", "form_n", "lem" , "lem_n"]
......@@ -155,7 +156,7 @@ SELECT terms,id FROM ins_form_ret
selectLems :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
selectLems l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems (PGS.Only $ Values fields datas)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["int4","int4","text", "int4"]
fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"]
datas = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns
----------------------
......
......@@ -50,13 +50,15 @@ import Data.Time (UTCTime)
import Database.PostgreSQL.Simple qualified as PGS (In(..), Query, Only(..))
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_publication_date )
import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata )
import Gargantext.Database.Query.Table.Node.Error (HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Ngrams () -- instances
import Gargantext.Database.Schema.Node ( node_id, node_typename, queryNodeTable, NodeRead )
import Gargantext.Database.Schema.NodeContext
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
......
......@@ -16,7 +16,6 @@ NodeNgrams register Context of Ngrams (named Cgrams then)
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.NodeNgrams
( getCgramsId
......@@ -32,9 +31,10 @@ import Data.Map.Strict qualified as Map
import Data.Maybe (fromJust)
import Database.PostgreSQL.Simple qualified as PGS (Query, Only(..))
import Gargantext.Core
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Types
import Gargantext.Database.Prelude (DBCmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (NgramsType, fromNgramsTypeId)
import Gargantext.Database.Schema.Ngrams (fromNgramsTypeId)
import Gargantext.Database.Schema.NodeNgrams
import Gargantext.Database.Schema.Prelude (Select, FromRow, sql, fromRow, toField, field, Values(..), QualifiedIdentifier(..), selectTable)
import Gargantext.Prelude
......
......@@ -39,10 +39,12 @@ import Data.Text (splitOn)
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_publication_date )
import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( Hyperdata )
import Gargantext.Database.Prelude (DBCmd, mkCmd, runPGSQuery, runCountOpaQuery, runOpaQuery)
import Gargantext.Database.Schema.Ngrams ()
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.NodeNode
import Gargantext.Prelude
......
......@@ -20,26 +20,20 @@ Ngrams connection to the Database.
module Gargantext.Database.Schema.Ngrams
where
import Codec.Serialise (Serialise())
import Control.Lens (over)
import Data.Aeson
import Data.Aeson.Types (toJSONKeyText)
import Data.Bimap (Bimap)
import Data.Bimap qualified as Bimap
import Data.ByteString.Char8 qualified as B
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Text (splitOn, pack, strip)
import Data.Text (splitOn, strip)
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
import Gargantext.Core (HasDBid(..))
import Gargantext.Core.Types (TODO(..), Typed(..))
import Gargantext.Core.Text.Ngrams ( Ngrams(..), NgramsType(..), NgramsT )
import Gargantext.Core.Types (Typed(..))
import Gargantext.Database.Schema.Prelude hiding (over)
import Gargantext.Database.Types
import Gargantext.Database.Types ( Indexed(Indexed) )
import Gargantext.Prelude
import Servant (FromHttpApiData(..), ToHttpApiData(..))
import Test.QuickCheck (elements)
import Text.Read (read)
type NgramsId = Int
......@@ -71,46 +65,6 @@ ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTable
}
)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance Serialise NgramsType
instance FromJSON NgramsType
where
parseJSON (String "Authors") = pure Authors
parseJSON (String "Institutes") = pure Institutes
parseJSON (String "Sources") = pure Sources
parseJSON (String "Terms") = pure NgramsTerms
parseJSON (String "NgramsTerms") = pure NgramsTerms
parseJSON _ = mzero
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
where
toJSON Authors = String "Authors"
toJSON Institutes = String "Institutes"
toJSON Sources = String "Sources"
toJSON NgramsTerms = String "Terms"
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance Arbitrary NgramsType where
arbitrary = elements [ minBound .. maxBound ]
-- map NgramsType to its assigned id
instance FromField NgramsType where
fromField fld mdata =
......@@ -127,14 +81,19 @@ instance FromField NgramsType where
instance ToField NgramsType where
toField nt = toField $ toDBid nt
instance FromField Ngrams where
fromField fld mdata = do
x <- fromField fld mdata
pure $ text2ngrams x
ngramsTypes :: [NgramsType]
ngramsTypes = [minBound..]
instance PGS.ToRow Text where
toRow t = [toField t]
text2ngrams :: Text -> Ngrams
text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
where
txt' = strip txt
instance ToSchema NgramsType
{- where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
--}
newtype NgramsTypeId = NgramsTypeId Int
deriving (Eq, Show, Ord, Num)
......@@ -179,46 +138,6 @@ instance HasDBid NgramsType where
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
, _ngramsSize :: Int
}
deriving (Generic, Show, Eq, Ord)
instance Hashable Ngrams
makeLenses ''Ngrams
instance PGS.ToRow Ngrams where
toRow (UnsafeNgrams t s) = [toField t, toField s]
instance FromField Ngrams where
fromField fld mdata = do
x <- fromField fld mdata
pure $ text2ngrams x
instance PGS.ToRow Text where
toRow t = [toField t]
text2ngrams :: Text -> Ngrams
text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
where
txt' = strip txt
------------------------------------------------------------------------
-------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
-- Named entity are typed ngrams of Terms Ngrams
data NgramsT a =
NgramsT { _ngramsType :: NgramsType
, _ngramsT :: a
} deriving (Generic, Show, Eq, Ord)
makeLenses ''NgramsT
instance Functor NgramsT where
fmap = over ngramsT
-----------------------------------------------------------------------
withMap :: HashMap Text NgramsId -> Text -> NgramsId
withMap m n = maybe (panicTrace $ "[G.D.S.Ngrams.withMap] Should not happen" <> (show n))
......
......@@ -20,9 +20,9 @@ ngrams in NgramsTerm Lists.
module Gargantext.Database.Schema.NgramsPostag
where
import Control.Lens
import Control.Lens ( makeLenses )
import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Schema.Prelude ( Column, SqlInt4, SqlText, ToField(toField), toRow )
import Gargantext.Prelude
......
......@@ -20,8 +20,9 @@ NodeNgrams register Context of Ngrams (named Cgrams then)
module Gargantext.Database.Schema.NodeNgrams where
import Gargantext.Core.Types
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Types.Main ( ListType )
import Gargantext.Database.Admin.Types.Node ( NodeId )
import Gargantext.Database.Schema.Prelude
import Gargantext.Prelude
......
......@@ -13,7 +13,6 @@ Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Utils.SpacyNLP (
module Gargantext.Utils.SpacyNLP.Types
......@@ -24,9 +23,8 @@ module Gargantext.Utils.SpacyNLP (
) where
import Data.Aeson (encode)
import Data.Text hiding (map, group, filter, concat, zip)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types ( PosSentences(PosSentences), Sentence(Sentence), Token(Token) )
import Gargantext.Prelude
import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
import Network.URI (URI(..))
......@@ -42,22 +40,22 @@ spacyRequest uri txt = do
----------------------------------------------------------------
spacyTagsToToken :: SpacyTags -> Token
spacyTagsToToken st = Token (_spacyTags_index st)
(_spacyTags_normalized st)
(_spacyTags_text st)
(_spacyTags_lemma st)
(_spacyTags_head_index st)
(_spacyTags_char_offset st)
(Just $ _spacyTags_pos st)
(Just $ _spacyTags_ent_type st)
(Just $ _spacyTags_prefix st)
(Just $ _spacyTags_suffix st)
spacyTagsToToken st =
Token (_spacyTags_index st)
(_spacyTags_normalized st)
(_spacyTags_text st)
(_spacyTags_lemma st)
(_spacyTags_head_index st)
(_spacyTags_char_offset st)
(Just $ _spacyTags_pos st)
(Just $ _spacyTags_ent_type st)
(Just $ _spacyTags_prefix st)
(Just $ _spacyTags_suffix st)
spacyDataToPosSentences :: SpacyData -> PosSentences
spacyDataToPosSentences (SpacyData ds) = PosSentences
$ map (\(i, ts) -> Sentence i ts)
$ zip [1..]
$ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds
$ zipWith Sentence [1..]
(map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds)
-----------------------------------------------------------------
......
......@@ -13,7 +13,7 @@ module Test.API.UpdateList (
, pollUntilFinished
) where
import Control.Lens ((^.), mapped, over, view)
import Control.Lens ((^.), mapped, over)
import Control.Monad.Fail (fail)
import Data.Aeson.QQ
import Data.Map.Strict qualified as Map
......@@ -27,14 +27,13 @@ import Gargantext.API.Admin.Auth.Types (Token)
import Gargantext.API.Ngrams qualified as APINgrams
import Gargantext.API.Ngrams.List ( ngramsListFromCSVData )
import Gargantext.API.Ngrams.Types ( MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTablePatch(..), NgramsTerm(..), Versioned(..), mSetToList, toNgramsPatch, ne_children, ne_ngrams, vc_data, _NgramsTable )
import Gargantext.Core.NodeStory (hasNodeStory, nse_getter, HasNodeArchiveStoryImmediateSaver(..))
import Gargantext.Core.Text.Ngrams
import Gargantext.Core.Types ( CorpusId, ListId, ListType(..), NodeId, _NodeId )
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.User
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Ngrams
import Gargantext.Prelude hiding (get)
import Network.Wai.Handler.Warp qualified as Wai
import Paths_gargantext (getDataFileName)
......
......@@ -24,6 +24,7 @@ import Gargantext.API.Ngrams (commitStatePatch, mSetFromList, setListNgrams, sav
import Gargantext.API.Ngrams.Types (MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTerm(..), Versioned(..), mkNgramsTablePatch, nre_children, nre_list, nre_parent, nre_root)
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types.Individu
import Gargantext.Core.Types (ListType(..), ListId, NodeId, UserId)
import Gargantext.Database.Action.User (getUserId)
......@@ -32,7 +33,6 @@ import Gargantext.Database.Prelude (runPGSQuery)
import Gargantext.Database.Query.Table.Ngrams (selectNgramsId)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude
import Test.Database.Types
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment