Commit 6b0ddc61 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'testing' into stable

parents bc86389a f56e8fc8
Pipeline #6047 passed with stages
in 177 minutes and 51 seconds
This diff is collapsed.
...@@ -5,7 +5,7 @@ cabal-version: 3.4 ...@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.7 version: 0.0.7.1.3
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -133,6 +133,7 @@ library ...@@ -133,6 +133,7 @@ library
Gargantext.API.Node.Corpus.Update Gargantext.API.Node.Corpus.Update
Gargantext.API.Node.File Gargantext.API.Node.File
Gargantext.API.Node.Share Gargantext.API.Node.Share
Gargantext.API.Node.ShareURL
Gargantext.API.Node.Update Gargantext.API.Node.Update
Gargantext.API.Prelude Gargantext.API.Prelude
Gargantext.API.Routes Gargantext.API.Routes
......
...@@ -70,6 +70,7 @@ data Query m ...@@ -70,6 +70,7 @@ data Query m
, languages :: m [GQLNLP.LanguageTuple] , languages :: m [GQLNLP.LanguageTuple]
, nodes :: GQLNode.NodeArgs -> m [GQLNode.Node] , nodes :: GQLNode.NodeArgs -> m [GQLNode.Node]
, nodes_corpus :: GQLNode.CorpusArgs -> m [GQLNode.Corpus] , nodes_corpus :: GQLNode.CorpusArgs -> m [GQLNode.Corpus]
, node_children :: GQLNode.NodeChildrenArgs -> m [GQLNode.Node]
, node_parent :: GQLNode.NodeParentArgs -> m [GQLNode.Node] , node_parent :: GQLNode.NodeParentArgs -> m [GQLNode.Node]
, user_infos :: GQLUserInfo.UserInfoArgs -> m [GQLUserInfo.UserInfo] , user_infos :: GQLUserInfo.UserInfoArgs -> m [GQLUserInfo.UserInfo]
, users :: GQLUser.UserArgs -> m [GQLUser.User m] , users :: GQLUser.UserArgs -> m [GQLUser.User m]
...@@ -121,6 +122,7 @@ rootResolver authenticatedUser policyManager = ...@@ -121,6 +122,7 @@ rootResolver authenticatedUser policyManager =
, languages = GQLNLP.resolveLanguages , languages = GQLNLP.resolveLanguages
, nodes = GQLNode.resolveNodes authenticatedUser policyManager , nodes = GQLNode.resolveNodes authenticatedUser policyManager
, nodes_corpus = GQLNode.resolveNodesCorpus , nodes_corpus = GQLNode.resolveNodesCorpus
, node_children = GQLNode.resolveNodeChildren
, node_parent = GQLNode.resolveNodeParent , node_parent = GQLNode.resolveNodeParent
, user_infos = GQLUserInfo.resolveUserInfos authenticatedUser policyManager , user_infos = GQLUserInfo.resolveUserInfos authenticatedUser policyManager
, users = GQLUser.resolveUsers authenticatedUser policyManager , users = GQLUser.resolveUsers authenticatedUser policyManager
......
...@@ -14,23 +14,21 @@ Portability : POSIX ...@@ -14,23 +14,21 @@ Portability : POSIX
module Gargantext.API.GraphQL.Node where module Gargantext.API.GraphQL.Node where
import Data.Aeson import Data.Aeson ( Result(..), Value(..) )
import Data.Aeson.KeyMap qualified as KM import Data.Aeson.KeyMap qualified as KM
import Data.Morpheus.Types ( GQLType ) import Data.Morpheus.Types ( GQLType )
import Data.Text qualified as T import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
import Gargantext.API.Admin.Auth.Types import Gargantext.API.Auth.PolicyCheck ( nodeChecks, AccessPolicyManager )
import Gargantext.API.Auth.PolicyCheck
import Gargantext.API.GraphQL.PolicyCheck (withPolicy) import Gargantext.API.GraphQL.PolicyCheck (withPolicy)
import Gargantext.API.GraphQL.Types import Gargantext.API.GraphQL.Types ( GqlM )
import Gargantext.Core import Gargantext.Core ( HasDBid(lookupDBid) )
import Gargantext.Database.Admin.Types.Node (NodeType) import Gargantext.Database.Admin.Types.Node (NodeType)
import Gargantext.Database.Admin.Types.Node qualified as NN import Gargantext.Database.Admin.Types.Node qualified as NN
import Gargantext.Database.Prelude (CmdCommon) -- , JSONB) import Gargantext.Database.Prelude (CmdCommon) -- , JSONB)
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNode) import Gargantext.Database.Query.Table.Node (getClosestChildrenByType, getClosestParentIdByType, getNode)
import Gargantext.Database.Schema.Node qualified as N import Gargantext.Database.Schema.Node qualified as N
import Gargantext.Prelude import Gargantext.Prelude
import PUBMED.Types qualified as PUBMED import PUBMED.Types qualified as PUBMED
import Prelude qualified
data Corpus = Corpus data Corpus = Corpus
{ id :: Int { id :: Int
...@@ -89,7 +87,13 @@ dbNodesCorpus corpus_id = do ...@@ -89,7 +87,13 @@ dbNodesCorpus corpus_id = do
data NodeParentArgs data NodeParentArgs
= NodeParentArgs = NodeParentArgs
{ node_id :: Int { node_id :: Int
, parent_type :: Text , parent_type :: NodeType
} deriving (Generic, GQLType)
data NodeChildrenArgs
= NodeChildrenArgs
{ node_id :: Int
, child_type :: NodeType
} deriving (Generic, GQLType) } deriving (Generic, GQLType)
resolveNodeParent resolveNodeParent
...@@ -97,16 +101,21 @@ resolveNodeParent ...@@ -97,16 +101,21 @@ resolveNodeParent
=> NodeParentArgs -> GqlM e env [Node] => NodeParentArgs -> GqlM e env [Node]
resolveNodeParent NodeParentArgs { node_id, parent_type } = dbParentNodes node_id parent_type resolveNodeParent NodeParentArgs { node_id, parent_type } = dbParentNodes node_id parent_type
resolveNodeChildren
:: (CmdCommon env)
=> NodeChildrenArgs -> GqlM e env [Node]
resolveNodeChildren NodeChildrenArgs { node_id, child_type } = dbChildNodes node_id child_type
dbParentNodes dbParentNodes
:: (CmdCommon env) :: (CmdCommon env)
=> Int -> Text -> GqlM e env [Node] => Int -> NodeType -> GqlM e env [Node]
dbParentNodes node_id parent_type = do dbParentNodes node_id parentType = do
let mParentType = readEither (T.unpack parent_type) :: Either Prelude.String NodeType -- let mParentType = readEither (T.unpack parent_type) :: Either Prelude.String NodeType
case mParentType of -- case mParentType of
Left err -> do -- Left err -> do
lift $ printDebug "[dbParentNodes] error reading parent type" (T.pack err) -- lift $ printDebug "[dbParentNodes] error reading parent type" (T.pack err)
pure [] -- pure []
Right parentType -> do -- Right parentType -> do
mNodeId <- lift $ getClosestParentIdByType (NN.UnsafeMkNodeId node_id) parentType -- (fromNodeTypeId parent_type_id) mNodeId <- lift $ getClosestParentIdByType (NN.UnsafeMkNodeId node_id) parentType -- (fromNodeTypeId parent_type_id)
case mNodeId of case mNodeId of
Nothing -> pure [] Nothing -> pure []
...@@ -114,6 +123,13 @@ dbParentNodes node_id parent_type = do ...@@ -114,6 +123,13 @@ dbParentNodes node_id parent_type = do
node <- lift $ getNode id node <- lift $ getNode id
pure [toNode node] pure [toNode node]
dbChildNodes :: (CmdCommon env)
=> Int -> NodeType -> GqlM e env [Node]
dbChildNodes node_id childType = do
childIds <- lift $ getClosestChildrenByType (NN.UnsafeMkNodeId node_id) childType -- (fromNodeTypeId parent_type_id)
children <- lift $ mapM getNode childIds
pure $ toNode <$> children
toNode :: NN.Node json -> Node toNode :: NN.Node json -> Node
toNode N.Node { .. } = Node { id = nid toNode N.Node { .. } = Node { id = nid
, name = _node_name , name = _node_name
......
...@@ -3,11 +3,11 @@ module Gargantext.API.GraphQL.PolicyCheck where ...@@ -3,11 +3,11 @@ module Gargantext.API.GraphQL.PolicyCheck where
import Prelude import Prelude
import Control.Monad.Except import Control.Monad.Except (MonadError(..), MonadTrans(..))
import Gargantext.API.Admin.Auth.Types import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
import Gargantext.API.Auth.PolicyCheck import Gargantext.API.Auth.PolicyCheck ( BoolExpr, AccessCheck, AccessPolicyManager(..), AccessResult(..))
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types ( BackendInternalError(..) )
import Gargantext.API.GraphQL.Types import Gargantext.API.GraphQL.Types (GqlM)
import Gargantext.Database.Prelude (HasConnectionPool, HasConfig) import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
withPolicy :: (HasConnectionPool env, HasConfig env) withPolicy :: (HasConnectionPool env, HasConfig env)
......
...@@ -15,15 +15,15 @@ Portability : POSIX ...@@ -15,15 +15,15 @@ Portability : POSIX
module Gargantext.API.GraphQL.TreeFirstLevel where module Gargantext.API.GraphQL.TreeFirstLevel where
import Data.Morpheus.Types (GQLType) import Data.Morpheus.Types (GQLType)
import Gargantext.API.Admin.Auth.Types import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
import Gargantext.API.Auth.PolicyCheck import Gargantext.API.Auth.PolicyCheck (AccessPolicyManager, nodeChecks)
import Gargantext.API.GraphQL.PolicyCheck import Gargantext.API.GraphQL.PolicyCheck (withPolicy)
import Gargantext.API.GraphQL.Types import Gargantext.API.GraphQL.Types ( GqlM )
import Gargantext.Core (fromDBid) import Gargantext.Core (fromDBid)
import Gargantext.Core.Types (Tree, NodeTree, NodeType) -- import Gargantext.Core.Types (ContextId, CorpusId, ListId)
import Gargantext.Core.Types.Main ( Tree(TreeN), _tn_node, _tn_children, NodeTree(NodeTree, _nt_id, _nt_type), _nt_name ) import Gargantext.Core.Types.Main ( Tree(..), _tn_node, _tn_children, NodeTree(..), _nt_name )
import Gargantext.Database.Admin.Config () import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Admin.Types.Node (allNodeTypes, NodeId (UnsafeMkNodeId)) import Gargantext.Database.Admin.Types.Node (allNodeTypes, NodeId(..), NodeType)
import Gargantext.Database.Admin.Types.Node qualified as NN import Gargantext.Database.Admin.Types.Node qualified as NN
import Gargantext.Database.Prelude (CmdCommon) import Gargantext.Database.Prelude (CmdCommon)
import Gargantext.Database.Query.Table.Node (getNode) import Gargantext.Database.Query.Table.Node (getNode)
...@@ -51,7 +51,8 @@ data TreeFirstLevel m = TreeFirstLevel ...@@ -51,7 +51,8 @@ data TreeFirstLevel m = TreeFirstLevel
, parent :: m (Maybe TreeNode) , parent :: m (Maybe TreeNode)
, children :: [TreeNode] , children :: [TreeNode]
} deriving (Generic, GQLType) } deriving (Generic, GQLType)
data BreadcrumbArgs = BreadcrumbArgs data BreadcrumbArgs = BreadcrumbArgs
{ {
node_id :: Int node_id :: Int
...@@ -105,31 +106,33 @@ resolveParent Nothing = pure Nothing ...@@ -105,31 +106,33 @@ resolveParent Nothing = pure Nothing
nodeToTreeNode :: HasCallStack => NN.Node json -> Maybe TreeNode nodeToTreeNode :: HasCallStack => NN.Node json -> Maybe TreeNode
nodeToTreeNode N.Node {..} = if (fromDBid _node_typename /= NN.NodeFolderShared) && (fromDBid _node_typename /= NN.NodeTeam) nodeToTreeNode N.Node {..} =
then if (fromDBid _node_typename /= NN.NodeFolderShared) && (fromDBid _node_typename /= NN.NodeTeam)
Just TreeNode { id = NN.unNodeId _node_id then
, name = _node_name Just TreeNode { id = NN.unNodeId _node_id
, node_type = fromDBid _node_typename , name = _node_name
, parent_id = NN.unNodeId <$> _node_parent_id , node_type = fromDBid _node_typename
} , parent_id = NN.unNodeId <$> _node_parent_id
else }
Nothing else
Nothing
resolveBreadcrumb :: (CmdCommon env) => BreadcrumbArgs -> GqlM e env (BreadcrumbInfo)
resolveBreadcrumb :: (CmdCommon env) => BreadcrumbArgs -> GqlM e env BreadcrumbInfo
resolveBreadcrumb BreadcrumbArgs { node_id } = dbRecursiveParents node_id resolveBreadcrumb BreadcrumbArgs { node_id } = dbRecursiveParents node_id
convertDbTreeToTreeNode :: HasCallStack => T.DbTreeNode -> TreeNode convertDbTreeToTreeNode :: HasCallStack => T.DbTreeNode -> TreeNode
convertDbTreeToTreeNode T.DbTreeNode { _dt_name, _dt_nodeId, _dt_typeId, _dt_parentId } = TreeNode convertDbTreeToTreeNode T.DbTreeNode { _dt_name, _dt_nodeId, _dt_typeId, _dt_parentId } =
{ name = _dt_name TreeNode
, id = NN.unNodeId _dt_nodeId { name = _dt_name
, node_type = fromDBid _dt_typeId , id = NN.unNodeId _dt_nodeId
, parent_id = NN.unNodeId <$> _dt_parentId , node_type = fromDBid _dt_typeId
} , parent_id = NN.unNodeId <$> _dt_parentId
}
dbRecursiveParents :: (CmdCommon env) => Int -> GqlM e env (BreadcrumbInfo)
dbRecursiveParents node_id = do
let nId = UnsafeMkNodeId node_id dbRecursiveParents :: (CmdCommon env) => Int -> GqlM e env BreadcrumbInfo
dbRecursiveParents nodeId = do
let nId = UnsafeMkNodeId nodeId
dbParents <- lift $ T.recursiveParents nId allNodeTypes dbParents <- lift $ T.recursiveParents nId allNodeTypes
let treeNodes = map convertDbTreeToTreeNode dbParents let treeNodes = map convertDbTreeToTreeNode dbParents
let breadcrumbInfo = BreadcrumbInfo { parents = treeNodes } pure $ BreadcrumbInfo { parents = treeNodes }
pure breadcrumbInfo
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.API.Node.ShareURL where
import Data.Text
import Gargantext.Prelude
import Gargantext.API.Prelude
import Servant
import Gargantext.Core.Types (NodeType, NodeId, unNodeId)
import Gargantext.Database.Prelude (HasConfig (hasConfig), CmdCommon)
import Control.Lens.Getter (view)
import Gargantext.Prelude.Config (gc_url)
import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors (BackendInternalError)
type API = Summary "Fetch URL for sharing a node"
:> QueryParam "type" NodeType
:> QueryParam "id" NodeId
:> Get '[JSON] Text
api :: ServerT API (GargM Env BackendInternalError)
api = getUrl
getUrl :: (CmdCommon env) =>
Maybe NodeType -> Maybe NodeId -> GargM env BackendInternalError Text
getUrl nt id = do
-- TODO add check that the node is able to be shared (in a shared folder)
case nt of
Nothing -> pure "Invalid node Type"
Just t ->
case id of
Nothing -> pure "Invalid node ID"
Just i -> do
url <- view $ hasConfig . gc_url
pure $ url <> "/#/share/" <> show t <> "/" <> show (unNodeId i)
...@@ -43,6 +43,7 @@ import Gargantext.API.Node.Corpus.Export.Types qualified as CorpusExport ...@@ -43,6 +43,7 @@ import Gargantext.API.Node.Corpus.Export.Types qualified as CorpusExport
import Gargantext.API.Node.Corpus.New qualified as New import Gargantext.API.Node.Corpus.New qualified as New
import Gargantext.API.Node.Document.Export qualified as DocumentExport import Gargantext.API.Node.Document.Export qualified as DocumentExport
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Node.ShareURL qualified as ShareURL
import Gargantext.API.Prelude import Gargantext.API.Prelude
import Gargantext.API.Public qualified as Public import Gargantext.API.Public qualified as Public
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
...@@ -222,6 +223,7 @@ type GargPrivateAPI' = ...@@ -222,6 +223,7 @@ type GargPrivateAPI' =
:<|> List.GETAPI :<|> List.GETAPI
:<|> List.JSONAPI :<|> List.JSONAPI
:<|> List.CSVAPI :<|> List.CSVAPI
:<|> "shareurl" :> ShareURL.API
{- {-
:<|> "wait" :> Summary "Wait test" :<|> "wait" :> Summary "Wait test"
:> Capture "x" Int :> Capture "x" Int
...@@ -305,6 +307,7 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId) ...@@ -305,6 +307,7 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
:<|> List.getApi :<|> List.getApi
:<|> List.jsonApi :<|> List.jsonApi
:<|> List.csvApi :<|> List.csvApi
:<|> ShareURL.api
-- :<|> waitAPI -- :<|> waitAPI
......
...@@ -66,7 +66,7 @@ class Collage sup inf where ...@@ -66,7 +66,7 @@ class Collage sup inf where
instance Collage Texte Paragraphe where instance Collage Texte Paragraphe where
dec (Texte t) = map Paragraphe $ DT.splitOn "\n" t dec (Texte t) = map Paragraphe $ DT.splitOn "\n" t
inc = Texte . DT.intercalate "\n" . map (\(Paragraphe t) -> t) inc = Texte . DT.unlines . map (\(Paragraphe t) -> t)
instance Collage Paragraphe Phrase where instance Collage Paragraphe Phrase where
dec (Paragraphe t) = map Phrase $ sentences t dec (Paragraphe t) = map Phrase $ sentences t
...@@ -78,7 +78,7 @@ instance Collage Phrase MultiTerme where ...@@ -78,7 +78,7 @@ instance Collage Phrase MultiTerme where
instance Collage MultiTerme Mot where instance Collage MultiTerme Mot where
dec (MultiTerme mt) = map Mot $ DT.words mt dec (MultiTerme mt) = map Mot $ DT.words mt
inc = MultiTerme . DT.intercalate " " . map (\(Mot m) -> m) inc = MultiTerme . DT.unwords . map (\(Mot m) -> m)
------------------------------------------------------------------- -------------------------------------------------------------------
-- Contexts of text -- Contexts of text
...@@ -92,7 +92,7 @@ isCharStop :: Char -> Bool ...@@ -92,7 +92,7 @@ isCharStop :: Char -> Bool
isCharStop c = c `elem` ['.','?','!'] isCharStop c = c `elem` ['.','?','!']
unsentences :: [Text] -> Text unsentences :: [Text] -> Text
unsentences txts = DT.intercalate " " txts unsentences txts = DT.unwords txts
-- | Ngrams size -- | Ngrams size
size :: Text -> Int size :: Text -> Int
......
...@@ -71,7 +71,7 @@ isidoreToDoc lang (IsidoreDoc t a d u s as) = do ...@@ -71,7 +71,7 @@ isidoreToDoc lang (IsidoreDoc t a d u s as) = do
langText :: LangText -> Text langText :: LangText -> Text
langText (LangText _l t1) = t1 langText (LangText _l t1) = t1
langText (OnlyText t2 ) = t2 langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts langText (ArrayText ts ) = Text.unwords $ map langText ts
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) Just d let mDateS = maybe (Just $ Text.pack $ show Defaults.year) Just d
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
......
...@@ -247,7 +247,7 @@ text2titleParagraphs n = catMaybes ...@@ -247,7 +247,7 @@ text2titleParagraphs n = catMaybes
n' = n + (round $ (fromIntegral n) / (2 :: Double)) n' = n + (round $ (fromIntegral n) / (2 :: Double))
doTitle :: [Text] -> Maybe (Text, Text) doTitle :: [Text] -> Maybe (Text, Text)
doTitle (t:ts) = Just (t, DT.intercalate " " ts) doTitle (t:ts) = Just (t, DT.unwords ts)
doTitle [] = Nothing doTitle [] = Nothing
......
...@@ -58,6 +58,11 @@ isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x) ...@@ -58,6 +58,11 @@ isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
-} -}
-- | Good value from users' requests and anthropological analysis
goodMapListSize :: Int
goodMapListSize = 350
-- | TODO improve grouping functions of Authors, Sources, Institutes.. -- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists :: ( HasNodeStory env err m buildNgramsLists :: ( HasNodeStory env err m
, HasNLPServer env , HasNLPServer env
...@@ -71,7 +76,7 @@ buildNgramsLists :: ( HasNodeStory env err m ...@@ -71,7 +76,7 @@ buildNgramsLists :: ( HasNodeStory env err m
-> GroupParams -> GroupParams
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsLists user uCid mCid mfslw gp = do buildNgramsLists user uCid mCid mfslw gp = do
ngTerms <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize 350) ngTerms <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize goodMapListSize)
othersTerms <- mapM (buildNgramsOthersList user uCid mfslw GroupIdentity) othersTerms <- mapM (buildNgramsOthersList user uCid mfslw GroupIdentity)
[ (Authors , MapListSize 9, MaxListSize 1000) [ (Authors , MapListSize 9, MaxListSize 1000)
, (Sources , MapListSize 9, MaxListSize 1000) , (Sources , MapListSize 9, MaxListSize 1000)
...@@ -179,23 +184,24 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi ...@@ -179,23 +184,24 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
) )
-- printDebug "[buildNgramsTermsList: Flow Social List / end]" nt -- printDebug "[buildNgramsTermsList: Flow Social List / end]" nt
let !ngramsKeys = HashSet.fromList let !allKeys = HashMap.keysSet allTerms
$ List.take mapListSize
$ HashSet.toList
$ HashMap.keysSet allTerms
-- printDebug "[buildNgramsTermsList: ngramsKeys]" (HashSet.size ngramsKeys) -- printDebug "[buildNgramsTermsList: ngramsKeys]" (HashSet.size ngramsKeys)
!groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) ngramsKeys) !groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) allKeys)
let let
!socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists !socialLists_Stemmed = addScoreStem groupParams' allKeys socialLists
!groupedWithList = toGroupedTree socialLists_Stemmed allTerms !groupedWithList = toGroupedTree socialLists_Stemmed allTerms
!(stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType) !(stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
$ HashMap.fromList
$ List.take mapListSize
$ HashMap.toList
$ HashMap.filter (\g -> view gts'_score g > 1) $ HashMap.filter (\g -> view gts'_score g > 1)
$ view flc_scores groupedWithList $ view flc_scores groupedWithList
!(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms -- | Split candidateTerms into mono-terms and multi-terms.
!(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms
-- void $ panicTrace $ "groupedWithList: " <> show groupedWithList -- void $ panicTrace $ "groupedWithList: " <> show groupedWithList
...@@ -211,6 +217,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi ...@@ -211,6 +217,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
!monoSize = 0.4 :: Double !monoSize = 0.4 :: Double
!multSize = 1 - monoSize !multSize = 1 - monoSize
-- | Splits given hashmap into 2 pieces, based on score
splitAt' n' ns = both (HashMap.fromListWith (<>)) splitAt' n' ns = both (HashMap.fromListWith (<>))
$ List.splitAt (round $ n' * listSizeGlobal) $ List.splitAt (round $ n' * listSizeGlobal)
$ List.sortOn (viewScore . snd) $ List.sortOn (viewScore . snd)
...@@ -254,8 +261,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi ...@@ -254,8 +261,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
] ]
where where
mapStemNodeIds = HashMap.toList mapStemNodeIds = HashMap.toList
$ HashMap.map viewScores $ HashMap.map viewScores groupedTreeScores_SetNodeId
$ groupedTreeScores_SetNodeId
let let
-- computing scores -- computing scores
mapScores f = HashMap.fromList mapScores f = HashMap.fromList
......
...@@ -69,7 +69,7 @@ groupWith :: GroupParams ...@@ -69,7 +69,7 @@ groupWith :: GroupParams
groupWith GroupIdentity t = identity t groupWith GroupIdentity t = identity t
groupWith (GroupParams { unGroupParams_lang = l }) t = groupWith (GroupParams { unGroupParams_lang = l }) t =
NgramsTerm NgramsTerm
$ Text.intercalate " " $ Text.unwords
$ map (stem l PorterAlgorithm) $ map (stem l PorterAlgorithm)
-- . take n -- . take n
$ List.sort $ List.sort
......
...@@ -14,14 +14,15 @@ TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93id ...@@ -14,14 +14,15 @@ TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93id
-} -}
module Gargantext.Core.Text.Metrics.TFICF ( TFICF module Gargantext.Core.Text.Metrics.TFICF
, TficfContext(..) ( TFICF
, Total(..) , TficfContext(..)
, Count(..) , Total(..)
, tficf , Count(..)
, sortTficf , tficf
) , sortTficf
where )
where
import Data.List qualified as List import Data.List qualified as List
import Data.Map.Strict (toList) import Data.Map.Strict (toList)
...@@ -34,12 +35,19 @@ path = "[G.T.Metrics.TFICF]" ...@@ -34,12 +35,19 @@ path = "[G.T.Metrics.TFICF]"
type TFICF = Double type TFICF = Double
-- https://www.researchgate.net/publication/221226686_TF-ICF_A_New_Term_Weighting_Scheme_for_Clustering_Dynamic_Data_Streams
-- TficfSupra n m
-- - m is the total number of documents in the corpus
-- - n is the number of documents, where given term occured more than once
-- TficfInfra n m
-- -
data TficfContext n m = TficfInfra n m data TficfContext n m = TficfInfra n m
| TficfSupra n m | TficfSupra n m
deriving (Show) deriving (Show)
data Total = Total {unTotal :: !Double} newtype Total = Total { unTotal :: Double }
data Count = Count {unCount :: !Double} newtype Count = Count { unCount :: Double }
tficf :: TficfContext Count Total tficf :: TficfContext Count Total
-> TficfContext Count Total -> TficfContext Count Total
...@@ -50,7 +58,11 @@ tficf (TficfInfra (Count ic) (Total it) ) ...@@ -50,7 +58,11 @@ tficf (TficfInfra (Count ic) (Total it) )
| otherwise = panicTrace | otherwise = panicTrace
$ "[ERR]" $ "[ERR]"
<> path <> path
<> " Frequency impossible" <> " Frequency impossible: "
<> "ic = " <> show ic
<> ", it = " <> show it
<> ", sc = " <> show sc
<> ", st = " <> show st
tficf _ _ = panicTrace $ "[ERR]" <> path <> "Undefined for these contexts" tficf _ _ = panicTrace $ "[ERR]" <> path <> "Undefined for these contexts"
......
...@@ -92,7 +92,8 @@ instance Hashable Ngrams ...@@ -92,7 +92,8 @@ instance Hashable Ngrams
makeLenses ''Ngrams makeLenses ''Ngrams
instance PGS.ToRow Ngrams where instance PGS.ToRow Ngrams where
toRow (UnsafeNgrams t s) = [toField t, toField s] toRow (UnsafeNgrams { .. }) = [ toField _ngramsTerms
, toField _ngramsSize ]
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------- -------------------------------------------------------------------------
......
...@@ -50,7 +50,7 @@ data Paragraph = Uniform Grain | AuthorLike ...@@ -50,7 +50,7 @@ data Paragraph = Uniform Grain | AuthorLike
-- Grain: number of Sentences by block of Text -- Grain: number of Sentences by block of Text
-- Step : overlap of sentence between connex block of Text -- Step : overlap of sentence between connex block of Text
groupUniform :: Grain -> [Text] -> [Text] groupUniform :: Grain -> [Text] -> [Text]
groupUniform g ts = map (Text.intercalate " ") groupUniform g ts = map Text.unwords
$ chunkAlong g g $ chunkAlong g g
$ sentences $ sentences
$ Text.concat ts $ Text.concat ts
......
...@@ -52,7 +52,7 @@ import Gargantext.Core.Text.Terms.Mono (monoTerms) ...@@ -52,7 +52,7 @@ import Gargantext.Core.Text.Terms.Mono (monoTerms)
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..)) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize) import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Core.Text.Terms.Multi (multiterms) import Gargantext.Core.Text.Terms.Multi (multiterms)
import Gargantext.Core.Types ( TermsCount, POS, Terms(Terms), TermsWithCount ) import Gargantext.Core.Types ( TermsCount, POS, Terms(..), TermsWithCount )
import Gargantext.Core.Utils (groupWithCounts) import Gargantext.Core.Utils (groupWithCounts)
import Gargantext.Database.Prelude (DBCmd) import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.Ngrams (insertNgrams) import Gargantext.Database.Query.Table.Ngrams (insertNgrams)
...@@ -60,6 +60,7 @@ import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgr ...@@ -60,6 +60,7 @@ import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgr
import Gargantext.Database.Schema.Ngrams (text2ngrams, NgramsId) import Gargantext.Database.Schema.Ngrams (text2ngrams, NgramsId)
import Gargantext.Prelude import Gargantext.Prelude
data TermType lang data TermType lang
= Mono { _tt_lang :: !lang } = Mono { _tt_lang :: !lang }
| Multi { _tt_lang :: !lang } | Multi { _tt_lang :: !lang }
...@@ -86,7 +87,7 @@ extractTerms ncs (Unsupervised {..}) xs = mapM (terms ncs (Unsupervised { _tt_mo ...@@ -86,7 +87,7 @@ extractTerms ncs (Unsupervised {..}) xs = mapM (terms ncs (Unsupervised { _tt_mo
where where
m' = case _tt_model of m' = case _tt_model of
Just m''-> m'' Just m''-> m''
Nothing -> newTries _tt_windowSize (Text.intercalate " " xs) Nothing -> newTries _tt_windowSize (Text.unwords xs)
extractTerms ncs termTypeLang xs = mapM (terms ncs termTypeLang) xs extractTerms ncs termTypeLang xs = mapM (terms ncs termTypeLang) xs
...@@ -124,15 +125,15 @@ class ExtractNgramsT h ...@@ -124,15 +125,15 @@ class ExtractNgramsT h
-> DBCmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount)) -> DBCmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
------------------------------------------------------------------------ ------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms ng1 ng2) = enrichedTerms l pa po (Terms { .. }) =
NgramsPostag { _np_lang = l NgramsPostag { _np_lang = l
, _np_algo = pa , _np_algo = pa
, _np_postag = po , _np_postag = po
, _np_form = form , _np_form = form
, _np_lem = lem } , _np_lem = lem }
where where
form = text2ngrams $ Text.intercalate " " ng1 form = text2ngrams $ Text.unwords _terms_label
lem = text2ngrams $ Text.intercalate " " $ Set.toList ng2 lem = text2ngrams $ Text.unwords $ Set.toList _terms_stem
------------------------------------------------------------------------ ------------------------------------------------------------------------
cleanNgrams :: Int -> Ngrams -> Ngrams cleanNgrams :: Int -> Ngrams -> Ngrams
......
...@@ -114,7 +114,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms KeepAll pat ...@@ -114,7 +114,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms KeepAll pat
-------------------------------------------------------------------------- --------------------------------------------------------------------------
addSpaces :: Text -> Text addSpaces :: Text -> Text
addSpaces = (Text.intercalate " ") . (Text.chunksOf 1) addSpaces = Text.unwords . (Text.chunksOf 1)
-------------------------------------------------------------------------- --------------------------------------------------------------------------
......
...@@ -68,7 +68,7 @@ data Terms = Terms { _terms_label :: Label ...@@ -68,7 +68,7 @@ data Terms = Terms { _terms_label :: Label
, _terms_stem :: Stems , _terms_stem :: Stems
} deriving (Ord, Show) } deriving (Ord, Show)
instance Eq Terms where instance Eq Terms where
(==) (Terms _ s1) (Terms _ s2) = s1 == s2 (==) (Terms { _terms_stem = s1 }) (Terms { _terms_stem = s2 }) = s1 == s2
type TermsCount = Int type TermsCount = Int
......
...@@ -151,6 +151,6 @@ instance ToHyperdataRow HyperdataContact where ...@@ -151,6 +151,6 @@ instance ToHyperdataRow HyperdataContact where
toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) = toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) =
HyperdataRowContact (fromMaybe "FirstName" fn) (fromMaybe "LastName" ln) ou' HyperdataRowContact (fromMaybe "FirstName" fn) (fromMaybe "LastName" ln) ou'
where where
ou' = maybe "CNRS" (Text.intercalate " " . _cw_organization) (head ou) ou' = maybe "CNRS" (Text.unwords . _cw_organization) (head ou)
toHyperdataRow (HyperdataContact {}) = toHyperdataRow (HyperdataContact {}) =
HyperdataRowContact "FirstName" "LastName" "Labs" HyperdataRowContact "FirstName" "LastName" "Labs"
...@@ -118,14 +118,14 @@ cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold stren ...@@ -118,14 +118,14 @@ cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold stren
partitions <- if (Map.size distanceMap > 0) partitions <- if (Map.size distanceMap > 0)
then recursiveClustering' (spinglass' 1) distanceMap then recursiveClustering' (spinglass' 1) distanceMap
else panic $ Text.intercalate " " [ "I can not compute the graph you request" else panic $ Text.unwords [ "I can not compute the graph you request"
, "because either the quantity of documents" , "because either the quantity of documents"
, "or the quantity of terms" , "or the quantity of terms"
, "are lacking." , "are lacking."
, "Solution: add more either Documents or Map Terms to your analysis." , "Solution: add more either Documents or Map Terms to your analysis."
, "Follow the available tutorials on the Training EcoSystems." , "Follow the available tutorials on the Training EcoSystems."
, "Ask your co-users of GarganText how to have access to it." , "Ask your co-users of GarganText how to have access to it."
] ]
length partitions `seq` pure () length partitions `seq` pure ()
let let
......
...@@ -185,12 +185,13 @@ selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt tms = ...@@ -185,12 +185,13 @@ selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt tms =
( int ( int
, toDBid NodeDocument , toDBid NodeDocument
, cId , cId
, Values fields ((DPS.Only . unNgramsTerm) <$> (List.take 10000 tms)) -- , Values fields ((DPS.Only . unNgramsTerm) <$> (List.take 10000 tms))
, DPS.In (unNgramsTerm <$> (List.take 10000 tms))
, cId , cId
, toDBid nt , toDBid nt
) )
where -- where
fields = [QualifiedIdentifier Nothing "text"] -- fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
queryNgramsOccurrencesOnlyByContextUser_withSample = [sql| queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
...@@ -198,18 +199,42 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql| ...@@ -198,18 +199,42 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
JOIN nodes_contexts nn ON n.id = nn.context_id JOIN nodes_contexts nn ON n.id = nn.context_id
WHERE n.typename = ? WHERE n.typename = ?
AND nn.node_id = ?), AND nn.node_id = ?),
input_rows(terms) AS (?) input_rows AS (
SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng SELECT id, terms
JOIN ngrams ng ON cng.ngrams_id = ng.id FROM ngrams
JOIN input_rows ir ON ir.terms = ng.terms WHERE terms IN ?
)
SELECT ir.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
JOIN input_rows ir ON cng.ngrams_id = ir.id
JOIN nodes_contexts nn ON nn.context_id = cng.context_id JOIN nodes_contexts nn ON nn.context_id = cng.context_id
JOIN nodes_sample n ON nn.context_id = n.id JOIN nodes_sample n ON nn.context_id = n.id
WHERE nn.node_id = ? -- CorpusId WHERE nn.node_id = ? -- CorpusId
AND cng.ngrams_type = ? -- NgramsTypeId AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0 AND nn.category > 0
GROUP BY cng.node_id, ng.terms GROUP BY cng.node_id, ir.terms
|] |]
-- queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
-- queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
-- WITH nodes_sample AS (SELECT c.id FROM contexts c TABLESAMPLE SYSTEM_ROWS (?)
-- JOIN nodes_contexts nc ON c.id = nc.context_id
-- WHERE c.typename = ?
-- AND nc.node_id = ?),
-- input_rows(terms) AS (?)
-- SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
-- JOIN ngrams ng ON cng.ngrams_id = ng.id
-- JOIN input_rows ir ON ir.terms = ng.terms
-- JOIN nodes_contexts nc ON nc.context_id = cng.context_id
-- JOIN nodes_sample ns ON nc.context_id = ns.id
-- WHERE nc.node_id = ? -- CorpusId
-- AND cng.ngrams_type = ? -- NgramsTypeId
-- AND nc.category > 0
-- -- AND nc.context_id IN (SELECT id FROM nodes_sample)
-- GROUP BY cng.node_id, ng.terms
-- |]
selectNgramsOccurrencesOnlyByContextUser_withSample' :: HasDBid NodeType selectNgramsOccurrencesOnlyByContextUser_withSample' :: HasDBid NodeType
=> CorpusId => CorpusId
-> Int -> Int
......
...@@ -70,6 +70,9 @@ getTficf_withSample cId mId nt = do ...@@ -70,6 +70,9 @@ getTficf_withSample cId mId nt = do
<$> getOccByNgramsOnlyFast_withSample mId countGlobal nt <$> getOccByNgramsOnlyFast_withSample mId countGlobal nt
(HM.keys mapTextDoubleLocal) (HM.keys mapTextDoubleLocal)
printDebug "[getTficf_withSample] mapTextDoubleLocal: " mapTextDoubleLocal
printDebug "[getTficf_withSample] mapTextDoubleGlobal: " mapTextDoubleGlobal
--printDebug "getTficf_withSample" (mapTextDoubleLocal, mapTextDoubleGlobal, countLocal, countGlobal) --printDebug "getTficf_withSample" (mapTextDoubleLocal, mapTextDoubleGlobal, countLocal, countGlobal)
pure $ HM.mapWithKey (\t n -> pure $ HM.mapWithKey (\t n ->
tficf (TficfInfra (Count n ) tficf (TficfInfra (Count n )
......
...@@ -207,9 +207,9 @@ fromField' field mb = do ...@@ -207,9 +207,9 @@ fromField' field mb = do
valueToHyperdata v = case fromJSON v of valueToHyperdata v = case fromJSON v of
Success a -> pure a Success a -> pure a
Error _err -> returnError ConversionFailed field Error _err -> returnError ConversionFailed field
$ DL.intercalate " " [ "cannot parse hyperdata for JSON: " $ DL.unwords [ "cannot parse hyperdata for JSON: "
, show v , show v
] ]
printSqlOpa :: Default Unpackspec a a => Select a -> IO () printSqlOpa :: Default Unpackspec a a => Select a -> IO ()
printSqlOpa = putStrLn . maybe "Empty query" identity . showSql printSqlOpa = putStrLn . maybe "Empty query" identity . showSql
......
...@@ -28,7 +28,7 @@ import Data.HashMap.Strict qualified as HashMap ...@@ -28,7 +28,7 @@ import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List import Data.List qualified as List
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType) import Gargantext.Core.Text.Ngrams (Ngrams(..), NgramsType)
import Gargantext.Database.Admin.Types.Node ( pgNodeId, CorpusId, ListId, DocId ) import Gargantext.Database.Admin.Types.Node ( pgNodeId, CorpusId, ListId, DocId )
import Gargantext.Database.Prelude (runOpaQuery, formatPGSQuery, runPGSQuery, DBCmd) import Gargantext.Database.Prelude (runOpaQuery, formatPGSQuery, runPGSQuery, DBCmd)
import Gargantext.Database.Query.Join (leftJoin3) import Gargantext.Database.Query.Join (leftJoin3)
...@@ -79,14 +79,15 @@ insertNgrams ns = ...@@ -79,14 +79,15 @@ insertNgrams ns =
-- TODO-ACCESS: access must not be checked here but when insertNgrams' is called. -- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
insertNgrams' :: [Ngrams] -> DBCmd err [Indexed Int Text] insertNgrams' :: [Ngrams] -> DBCmd err [Indexed Int Text]
insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns) insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns')
where where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"] ns' = (\n -> (_ngramsTerms n, _ngramsSize n)) <$> ns
fields = map (QualifiedIdentifier Nothing) ["text", "int4"]
_insertNgrams_Debug :: [(Text, Size)] -> DBCmd err ByteString _insertNgrams_Debug :: [(Text, Size)] -> DBCmd err ByteString
_insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns) _insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
where where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"] fields = map (QualifiedIdentifier Nothing) ["text", "int4"]
---------------------- ----------------------
queryInsertNgrams :: PGS.Query queryInsertNgrams :: PGS.Query
......
...@@ -53,14 +53,14 @@ type NgramsPostagInsert = ( Int ...@@ -53,14 +53,14 @@ type NgramsPostagInsert = ( Int
) )
toInsert :: NgramsPostag -> NgramsPostagInsert toInsert :: NgramsPostag -> NgramsPostagInsert
toInsert (NgramsPostag l a p form lem) = toInsert (NgramsPostag { .. }) =
( toDBid l ( toDBid _np_lang
, toDBid a , toDBid _np_algo
, show p , show _np_postag
, view ngramsTerms form , view ngramsTerms _np_form
, view ngramsSize form , view ngramsSize _np_form
, view ngramsTerms lem , view ngramsTerms _np_lem
, view ngramsSize lem , view ngramsSize _np_lem
) )
insertNgramsPostag :: [NgramsPostag] -> DBCmd err (HashMap Text NgramsId) insertNgramsPostag :: [NgramsPostag] -> DBCmd err (HashMap Text NgramsId)
...@@ -154,17 +154,18 @@ SELECT terms,id FROM ins_form_ret ...@@ -154,17 +154,18 @@ SELECT terms,id FROM ins_form_ret
-- TODO add lang and postag algo -- TODO add lang and postag algo
-- TODO remove when form == lem in insert -- TODO remove when form == lem in insert
selectLems :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)] selectLems :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
selectLems l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems (PGS.In (map _ngramsTerms ns), toDBid l, toDBid server) selectLems l (NLPServerConfig { server }) ns =
runPGSQuery querySelectLems (PGS.In (_ngramsTerms <$> ns), toDBid l, toDBid server)
---------------------- ----------------------
querySelectLems :: PGS.Query querySelectLems :: PGS.Query
querySelectLems = [sql| querySelectLems = [sql|
WITH WITH
trms trms
AS (SELECT id, terms, n AS (SELECT id, terms
FROM ngrams FROM ngrams
WHERE terms IN ?) WHERE terms IN ?)
, input_rows(lang_id, algo_id, terms,n) , input_rows
AS (SELECT ? as lang_id, ? as algo_id, terms, n, id AS (SELECT ? as lang_id, ? as algo_id, terms, id
FROM trms) FROM trms)
, lems AS ( select ir.terms as t1, n2.terms as t2, sum(np.score) as score from input_rows ir , lems AS ( select ir.terms as t1, n2.terms as t2, sum(np.score) as score from input_rows ir
JOIN ngrams_postag np ON np.ngrams_id = ir.id JOIN ngrams_postag np ON np.ngrams_id = ir.id
...@@ -179,29 +180,29 @@ querySelectLems = [sql| ...@@ -179,29 +180,29 @@ querySelectLems = [sql|
|] |]
-- | This is the same as 'selectLems', but slower. -- | This is the same as 'selectLems', but slower.
selectLems' :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)] -- selectLems' :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
selectLems' l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems' (PGS.Only $ Values fields datas) -- selectLems' l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems' (PGS.Only $ Values fields datas)
where -- where
fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"] -- fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"]
datas = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns -- datas = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns
querySelectLems' :: PGS.Query -- querySelectLems' :: PGS.Query
querySelectLems' = [sql| -- querySelectLems' = [sql|
WITH input_rows(lang_id, algo_id, terms,n) -- WITH input_rows(lang_id, algo_id, terms,n)
AS (?) -- ((VALUES ('automata' :: "text"))) -- AS (?) -- ((VALUES ('automata' :: "text")))
, lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir -- , lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir
JOIN ngrams n1 ON ir.terms = n1.terms -- JOIN ngrams n1 ON ir.terms = n1.terms
JOIN ngrams_postag np ON np.ngrams_id = n1.id -- JOIN ngrams_postag np ON np.ngrams_id = n1.id
JOIN ngrams n2 ON n2.id = np.lemm_id -- JOIN ngrams n2 ON n2.id = np.lemm_id
WHERE np.lang_id = ir.lang_id -- WHERE np.lang_id = ir.lang_id
AND np.algo_id = ir.algo_id -- AND np.algo_id = ir.algo_id
GROUP BY n1.terms, n2.terms -- GROUP BY n1.terms, n2.terms
ORDER BY score DESC -- ORDER BY score DESC
) -- )
SELECT t1,t2 from lems -- SELECT t1,t2 from lems
|] -- |]
-- | Insert Table -- | Insert Table
createTable_NgramsPostag :: DBCmd err [Int] createTable_NgramsPostag :: DBCmd err [Int]
......
...@@ -173,15 +173,26 @@ getChildrenByType :: HasDBid NodeType ...@@ -173,15 +173,26 @@ getChildrenByType :: HasDBid NodeType
-> NodeType -> NodeType
-> DBCmd err [NodeId] -> DBCmd err [NodeId]
getChildrenByType nId nType = do getChildrenByType nId nType = do
result <- runPGSQuery query (PGS.Only nId) childrenFirstLevel <- getClosestChildrenByType nId nType
children_lst <- mapM (\(id, _) -> getChildrenByType id nType) result childrenLst <- mapM (\id -> getChildrenByType id nType) childrenFirstLevel
pure $ concat $ [fst <$> filter (\(_, pTypename) -> pTypename == toDBid nType) result] ++ children_lst pure $ childrenFirstLevel ++ concat childrenLst
-- | Given a node id, find all it's children (only first level) of
-- given node type.
getClosestChildrenByType :: HasDBid NodeType
=> NodeId
-> NodeType
-> DBCmd err [NodeId]
getClosestChildrenByType nId nType = do
results <- runPGSQuery query (nId, toDBid nType)
pure $ (\(PGS.Only nodeId) -> nodeId) <$> results
where where
query :: PGS.Query query :: PGS.Query
query = [sql| query = [sql|
SELECT n.id, n.typename SELECT n.id
FROM nodes n FROM nodes n
WHERE n.parent_id = ?; WHERE n.parent_id = ?
AND n.typename = ?;
|] |]
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -317,6 +317,15 @@ nodeContextsScore inputData = map (\(PGS.Only a) -> a) ...@@ -317,6 +317,15 @@ nodeContextsScore inputData = map (\(PGS.Only a) -> a)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Counts the number of documents in a corpus.
-- Also applies filter for category to be at least 1 (i.e. not in trash).
-- select count(*)
-- from contexts c
-- join nodes_contexts nc on c.id = nc.context_id
-- where
-- nc.node_id = 88
-- and nc.category >= 1
-- and c.typename = 4
selectCountDocs :: HasDBid NodeType => CorpusId -> DBCmd err Int selectCountDocs :: HasDBid NodeType => CorpusId -> DBCmd err Int
selectCountDocs cId = runCountOpaQuery (queryCountDocs cId) selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
where where
......
...@@ -39,6 +39,10 @@ import Gargantext.Prelude ...@@ -39,6 +39,10 @@ import Gargantext.Prelude
type NgramsId = Int type NgramsId = Int
type Size = Int type Size = Int
-- | Ngrams table
-- 'n' is the size, see G.D.Q.T.Ngrams -> insertNgrams'
-- function. I.e. ngrams with 1 term are of size 1, ngrams with 2
-- terms are of size 2 etc.
data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
, _ngrams_terms :: !terms , _ngrams_terms :: !terms
, _ngrams_n :: !n , _ngrams_n :: !n
...@@ -90,7 +94,8 @@ instance PGS.ToRow Text where ...@@ -90,7 +94,8 @@ instance PGS.ToRow Text where
toRow t = [toField t] toRow t = [toField t]
text2ngrams :: Text -> Ngrams text2ngrams :: Text -> Ngrams
text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt' text2ngrams txt = UnsafeNgrams { _ngramsTerms = txt'
, _ngramsSize = length $ splitOn " " txt' }
where where
txt' = strip txt txt' = strip txt
......
...@@ -21,7 +21,9 @@ import Gargantext.Database.Schema.Prelude ...@@ -21,7 +21,9 @@ import Gargantext.Database.Schema.Prelude
import Gargantext.Prelude import Gargantext.Prelude
-- | Index memory of any type in Gargantext -- | Index memory of any type in Gargantext.
-- I.e. given entity 'a', we use this type to mark that it has a DB id of type 'i'.
-- An un-indexed entity 'a' might not have been INSERT-ed yet to the DB.
data Indexed i a = data Indexed i a =
Indexed { _index :: !i Indexed { _index :: !i
, _unIndex :: !a , _unIndex :: !a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment