Merge branch 'testing' into stable

6b0ddc61 · Alexandre Delanoë · bc86389a · f56e8fc8 · 6b0ddc61 · 6b0ddc61
Commit 6b0ddc61 authored May 07, 2024 by Alexandre Delanoë
30 changed files
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/gargantext.cabal
+++ b/gargantext.cabal
@@ -5,7 +5,7 @@ cabal-version: 3.4
 -- see: https://github.com/sol/hpack

 name:           gargantext
-version: 0.0.7
+version: 0.0.7.1.3
 synopsis:       Search, map, share
 description:    Please see README.md
 category:       Data
@@ -133,6 +133,7 @@ library
      Gargantext.API.Node.Corpus.Update
      Gargantext.API.Node.File
      Gargantext.API.Node.Share
+      Gargantext.API.Node.ShareURL
      Gargantext.API.Node.Update
      Gargantext.API.Prelude
      Gargantext.API.Routes

--- a/src/Gargantext/API/GraphQL.hs
+++ b/src/Gargantext/API/GraphQL.hs
@@ -70,6 +70,7 @@ data Query m
    , languages           :: m [GQLNLP.LanguageTuple]
    , nodes               :: GQLNode.NodeArgs -> m [GQLNode.Node]
    , nodes_corpus        :: GQLNode.CorpusArgs -> m [GQLNode.Corpus]
+    , node_children       :: GQLNode.NodeChildrenArgs -> m [GQLNode.Node]
    , node_parent         :: GQLNode.NodeParentArgs -> m [GQLNode.Node]
    , user_infos          :: GQLUserInfo.UserInfoArgs -> m [GQLUserInfo.UserInfo]
    , users               :: GQLUser.UserArgs -> m [GQLUser.User m]
@@ -121,6 +122,7 @@ rootResolver authenticatedUser policyManager =
                            , languages           = GQLNLP.resolveLanguages
                            , nodes               = GQLNode.resolveNodes authenticatedUser policyManager
                            , nodes_corpus        = GQLNode.resolveNodesCorpus
+                            , node_children       = GQLNode.resolveNodeChildren
                            , node_parent         = GQLNode.resolveNodeParent
                            , user_infos          = GQLUserInfo.resolveUserInfos authenticatedUser policyManager
                            , users               = GQLUser.resolveUsers authenticatedUser policyManager

--- a/src/Gargantext/API/GraphQL/Node.hs
+++ b/src/Gargantext/API/GraphQL/Node.hs
@@ -14,23 +14,21 @@ Portability : POSIX

 module Gargantext.API.GraphQL.Node where

-import Data.Aeson
+import Data.Aeson ( Result(..), Value(..) )
 import Data.Aeson.KeyMap qualified as KM
 import Data.Morpheus.Types ( GQLType )
-import Data.Text qualified as T
-import Gargantext.API.Admin.Auth.Types
-import Gargantext.API.Auth.PolicyCheck
+import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
+import Gargantext.API.Auth.PolicyCheck ( nodeChecks, AccessPolicyManager )
 import Gargantext.API.GraphQL.PolicyCheck (withPolicy)
-import Gargantext.API.GraphQL.Types
-import Gargantext.Core
+import Gargantext.API.GraphQL.Types ( GqlM )
+import Gargantext.Core ( HasDBid(lookupDBid) )
 import Gargantext.Database.Admin.Types.Node (NodeType)
 import Gargantext.Database.Admin.Types.Node qualified as NN
 import Gargantext.Database.Prelude (CmdCommon)  -- , JSONB)
-import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNode)
+import Gargantext.Database.Query.Table.Node (getClosestChildrenByType, getClosestParentIdByType, getNode)
 import Gargantext.Database.Schema.Node qualified as N
 import Gargantext.Prelude
 import PUBMED.Types qualified as PUBMED
-import Prelude qualified

 data Corpus = Corpus
  { id           :: Int
@@ -89,7 +87,13 @@ dbNodesCorpus corpus_id = do
 data NodeParentArgs
  = NodeParentArgs
    { node_id     :: Int
-    , parent_type :: Text
+    , parent_type :: NodeType
+    } deriving (Generic, GQLType)
+
+data NodeChildrenArgs
+  = NodeChildrenArgs
+    { node_id    :: Int
+    , child_type :: NodeType
    } deriving (Generic, GQLType)

 resolveNodeParent
@@ -97,16 +101,21 @@ resolveNodeParent
  => NodeParentArgs -> GqlM e env [Node]
 resolveNodeParent NodeParentArgs { node_id, parent_type } = dbParentNodes node_id parent_type

+resolveNodeChildren
+  :: (CmdCommon env)
+  => NodeChildrenArgs -> GqlM e env [Node]
+resolveNodeChildren NodeChildrenArgs { node_id, child_type } = dbChildNodes node_id child_type
+
 dbParentNodes
  :: (CmdCommon env)
-  => Int -> Text -> GqlM e env [Node]
-dbParentNodes node_id parent_type = do
-  let mParentType = readEither (T.unpack parent_type) :: Either Prelude.String NodeType
-  case mParentType of
-    Left err -> do
-      lift $ printDebug "[dbParentNodes] error reading parent type" (T.pack err)
-      pure []
-    Right parentType -> do
+  => Int -> NodeType -> GqlM e env [Node]
+dbParentNodes node_id parentType = do
+  -- let mParentType = readEither (T.unpack parent_type) :: Either Prelude.String NodeType
+  -- case mParentType of
+  --   Left err -> do
+  --     lift $ printDebug "[dbParentNodes] error reading parent type" (T.pack err)
+  --     pure []
+  --   Right parentType -> do
      mNodeId <- lift $ getClosestParentIdByType (NN.UnsafeMkNodeId node_id) parentType -- (fromNodeTypeId parent_type_id)
      case mNodeId of
        Nothing -> pure []
@@ -114,6 +123,13 @@ dbParentNodes node_id parent_type = do
          node <- lift $ getNode id
          pure [toNode node]

+dbChildNodes :: (CmdCommon env)
+             => Int -> NodeType -> GqlM e env [Node]
+dbChildNodes node_id childType = do
+  childIds <- lift $ getClosestChildrenByType (NN.UnsafeMkNodeId node_id) childType -- (fromNodeTypeId parent_type_id)
+  children <- lift $ mapM getNode childIds
+  pure $ toNode <$> children
+
 toNode :: NN.Node json -> Node
 toNode N.Node { .. } = Node { id = nid
                            , name = _node_name

--- a/src/Gargantext/API/GraphQL/PolicyCheck.hs
+++ b/src/Gargantext/API/GraphQL/PolicyCheck.hs
@@ -3,11 +3,11 @@ module Gargantext.API.GraphQL.PolicyCheck where

 import Prelude

-import Control.Monad.Except
-import Gargantext.API.Admin.Auth.Types
-import Gargantext.API.Auth.PolicyCheck
-import Gargantext.API.Errors.Types
-import Gargantext.API.GraphQL.Types
+import Control.Monad.Except (MonadError(..), MonadTrans(..))
+import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
+import Gargantext.API.Auth.PolicyCheck ( BoolExpr, AccessCheck, AccessPolicyManager(..), AccessResult(..))
+import Gargantext.API.Errors.Types ( BackendInternalError(..) )
+import Gargantext.API.GraphQL.Types (GqlM)
 import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)

 withPolicy :: (HasConnectionPool env, HasConfig env)

--- a/src/Gargantext/API/GraphQL/TreeFirstLevel.hs
+++ b/src/Gargantext/API/GraphQL/TreeFirstLevel.hs
@@ -15,15 +15,15 @@ Portability : POSIX
 module Gargantext.API.GraphQL.TreeFirstLevel where

 import Data.Morpheus.Types (GQLType)
-import Gargantext.API.Admin.Auth.Types
-import Gargantext.API.Auth.PolicyCheck
-import Gargantext.API.GraphQL.PolicyCheck
-import Gargantext.API.GraphQL.Types
+import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
+import Gargantext.API.Auth.PolicyCheck (AccessPolicyManager, nodeChecks)
+import Gargantext.API.GraphQL.PolicyCheck (withPolicy)
+import Gargantext.API.GraphQL.Types ( GqlM )
 import Gargantext.Core (fromDBid)
-import Gargantext.Core.Types (Tree, NodeTree, NodeType)
-import Gargantext.Core.Types.Main ( Tree(TreeN), _tn_node, _tn_children, NodeTree(NodeTree, _nt_id, _nt_type), _nt_name )
+-- import Gargantext.Core.Types (ContextId, CorpusId, ListId)
+import Gargantext.Core.Types.Main ( Tree(..), _tn_node, _tn_children, NodeTree(..), _nt_name )
 import Gargantext.Database.Admin.Config ()
-import Gargantext.Database.Admin.Types.Node (allNodeTypes, NodeId (UnsafeMkNodeId))
+import Gargantext.Database.Admin.Types.Node (allNodeTypes, NodeId(..), NodeType)
 import Gargantext.Database.Admin.Types.Node qualified as NN
 import Gargantext.Database.Prelude (CmdCommon)
 import Gargantext.Database.Query.Table.Node (getNode)
@@ -52,6 +52,7 @@ data TreeFirstLevel m = TreeFirstLevel
  , children :: [TreeNode]
  } deriving (Generic, GQLType)
  
+    
 data BreadcrumbArgs = BreadcrumbArgs
  {
    node_id :: Int
@@ -105,7 +106,8 @@ resolveParent Nothing = pure Nothing


 nodeToTreeNode :: HasCallStack => NN.Node json -> Maybe TreeNode
-nodeToTreeNode N.Node {..} = if (fromDBid _node_typename /= NN.NodeFolderShared) && (fromDBid _node_typename /= NN.NodeTeam)
+nodeToTreeNode N.Node {..} =
+ if (fromDBid _node_typename /= NN.NodeFolderShared) && (fromDBid _node_typename /= NN.NodeTeam)
   then
     Just TreeNode { id        = NN.unNodeId _node_id
                   , name      = _node_name
@@ -115,21 +117,22 @@ nodeToTreeNode N.Node {..} = if (fromDBid _node_typename /= NN.NodeFolderShared)
   else
     Nothing

-resolveBreadcrumb :: (CmdCommon env) => BreadcrumbArgs -> GqlM e env (BreadcrumbInfo)
+resolveBreadcrumb :: (CmdCommon env) => BreadcrumbArgs -> GqlM e env BreadcrumbInfo
 resolveBreadcrumb BreadcrumbArgs { node_id } = dbRecursiveParents node_id

 convertDbTreeToTreeNode :: HasCallStack => T.DbTreeNode -> TreeNode
-convertDbTreeToTreeNode T.DbTreeNode { _dt_name, _dt_nodeId, _dt_typeId, _dt_parentId } = TreeNode
+convertDbTreeToTreeNode T.DbTreeNode { _dt_name, _dt_nodeId, _dt_typeId, _dt_parentId } =
+  TreeNode
    { name = _dt_name
    , id = NN.unNodeId _dt_nodeId
    , node_type = fromDBid _dt_typeId
    , parent_id = NN.unNodeId <$> _dt_parentId
    }

-dbRecursiveParents :: (CmdCommon env) => Int -> GqlM e env (BreadcrumbInfo)
-dbRecursiveParents node_id = do
-  let nId = UnsafeMkNodeId node_id
+
+dbRecursiveParents :: (CmdCommon env) => Int -> GqlM e env BreadcrumbInfo
+dbRecursiveParents nodeId = do
+  let nId = UnsafeMkNodeId nodeId
  dbParents <- lift $ T.recursiveParents nId allNodeTypes
  let treeNodes = map convertDbTreeToTreeNode dbParents
-  let breadcrumbInfo = BreadcrumbInfo { parents = treeNodes }
-  pure breadcrumbInfo  
+  pure $ BreadcrumbInfo { parents = treeNodes }
--- a/src/Gargantext/API/Node/ShareURL.hs
+++ b/src/Gargantext/API/Node/ShareURL.hs
+{-# LANGUAGE TypeOperators        #-}
+{-# LANGUAGE ScopedTypeVariables  #-}
+
+module Gargantext.API.Node.ShareURL where
+
+import Data.Text
+import Gargantext.Prelude
+import Gargantext.API.Prelude
+import Servant
+import Gargantext.Core.Types (NodeType, NodeId, unNodeId)
+import Gargantext.Database.Prelude (HasConfig (hasConfig), CmdCommon)
+import Control.Lens.Getter (view)
+import Gargantext.Prelude.Config (gc_url)
+import Gargantext.API.Admin.EnvTypes (Env)
+import Gargantext.API.Errors (BackendInternalError)
+
+type API = Summary "Fetch URL for sharing a node"
+           :> QueryParam "type" NodeType
+           :> QueryParam "id" NodeId
+           :> Get '[JSON] Text
+
+api :: ServerT API (GargM Env BackendInternalError)
+api = getUrl
+
+getUrl :: (CmdCommon env) =>
+              Maybe NodeType -> Maybe NodeId -> GargM env BackendInternalError Text
+getUrl nt id = do
+-- TODO add check that the node is able to be shared (in a shared folder)
+    case nt of
+        Nothing -> pure "Invalid node Type"
+        Just t ->
+            case id of
+                Nothing -> pure "Invalid node ID"
+                Just i -> do
+                    url <- view $ hasConfig . gc_url
+                    pure $ url <> "/#/share/" <> show t <> "/" <> show (unNodeId i)
--- a/src/Gargantext/API/Routes.hs
+++ b/src/Gargantext/API/Routes.hs
@@ -43,6 +43,7 @@ import Gargantext.API.Node.Corpus.Export.Types qualified as CorpusExport
 import Gargantext.API.Node.Corpus.New qualified as New
 import Gargantext.API.Node.Document.Export qualified as DocumentExport
 import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
+import Gargantext.API.Node.ShareURL qualified as ShareURL
 import Gargantext.API.Prelude
 import Gargantext.API.Public qualified as Public
 import Gargantext.Core.Types.Individu (User(..))
@@ -222,6 +223,7 @@ type GargPrivateAPI' =
           :<|> List.GETAPI
           :<|> List.JSONAPI
           :<|> List.CSVAPI
+           :<|> "shareurl" :> ShareURL.API
 {-
           :<|> "wait"   :> Summary "Wait test"
                         :> Capture "x" Int
@@ -305,6 +307,7 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
     :<|> List.getApi
     :<|> List.jsonApi
     :<|> List.csvApi
+     :<|> ShareURL.api
 --     :<|> waitAPI



--- a/src/Gargantext/Core/Text.hs
+++ b/src/Gargantext/Core/Text.hs
@@ -66,7 +66,7 @@ class Collage sup inf where

 instance Collage Texte Paragraphe where
  dec (Texte t) = map Paragraphe $ DT.splitOn "\n" t
-  inc           = Texte . DT.intercalate "\n" . map (\(Paragraphe t) -> t)
+  inc           = Texte . DT.unlines . map (\(Paragraphe t) -> t)

 instance Collage Paragraphe Phrase where
  dec (Paragraphe t) = map Phrase $ sentences t
@@ -78,7 +78,7 @@ instance Collage Phrase MultiTerme where

 instance Collage MultiTerme Mot where
  dec (MultiTerme mt) = map Mot $ DT.words mt
-  inc                 = MultiTerme . DT.intercalate " " . map (\(Mot m) -> m)
+  inc                 = MultiTerme . DT.unwords . map (\(Mot m) -> m)

 -------------------------------------------------------------------
 -- Contexts of text
@@ -92,7 +92,7 @@ isCharStop :: Char -> Bool
 isCharStop c = c `elem` ['.','?','!']

 unsentences :: [Text] -> Text
-unsentences txts = DT.intercalate " " txts
+unsentences txts = DT.unwords txts

 -- | Ngrams size
 size :: Text -> Int

--- a/src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+++ b/src/Gargantext/Core/Text/Corpus/API/Isidore.hs
@@ -71,7 +71,7 @@ isidoreToDoc lang (IsidoreDoc t a d u s as) = do
    langText :: LangText -> Text
    langText (LangText _l t1) = t1
    langText (OnlyText t2   ) = t2
-    langText (ArrayText ts  ) = Text.intercalate " " $ map langText ts
+    langText (ArrayText ts  ) = Text.unwords $ map langText ts

  let mDateS = maybe (Just $ Text.pack $ show Defaults.year) Just d
  let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS

--- a/src/Gargantext/Core/Text/Corpus/Parsers/FrameWrite.hs
+++ b/src/Gargantext/Core/Text/Corpus/Parsers/FrameWrite.hs
@@ -247,7 +247,7 @@ text2titleParagraphs n = catMaybes
    n' = n + (round $ (fromIntegral n) / (2 :: Double))

 doTitle :: [Text] -> Maybe (Text, Text)
-doTitle (t:ts) = Just (t, DT.intercalate " " ts)
+doTitle (t:ts) = Just (t, DT.unwords ts)
 doTitle [] = Nothing



--- a/src/Gargantext/Core/Text/List.hs
+++ b/src/Gargantext/Core/Text/List.hs
@@ -58,6 +58,11 @@ isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
 -}


+-- | Good value from users' requests and anthropological analysis
+goodMapListSize :: Int
+goodMapListSize = 350
+    
+
 -- | TODO improve grouping functions of Authors, Sources, Institutes..
 buildNgramsLists :: ( HasNodeStory env err m
                    , HasNLPServer env
@@ -71,7 +76,7 @@ buildNgramsLists :: ( HasNodeStory env err m
                 -> GroupParams
                 -> m (Map NgramsType [NgramsElement])
 buildNgramsLists user uCid mCid mfslw gp = do
-  ngTerms     <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize 350)
+  ngTerms     <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize goodMapListSize)
  othersTerms <- mapM (buildNgramsOthersList user uCid mfslw GroupIdentity)
                      [ (Authors   , MapListSize 9, MaxListSize 1000)
                      , (Sources   , MapListSize 9, MaxListSize 1000)
@@ -179,22 +184,23 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
                                    )
  -- printDebug "[buildNgramsTermsList: Flow Social List / end]" nt

-  let !ngramsKeys = HashSet.fromList
-                  $ List.take mapListSize
-                  $ HashSet.toList
-                  $ HashMap.keysSet allTerms
+  let !allKeys = HashMap.keysSet allTerms

  -- printDebug "[buildNgramsTermsList: ngramsKeys]" (HashSet.size ngramsKeys)

-  !groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) ngramsKeys)
+  !groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) allKeys)

  let
-    !socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists
+    !socialLists_Stemmed = addScoreStem groupParams' allKeys socialLists
    !groupedWithList = toGroupedTree socialLists_Stemmed allTerms
    !(stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
+                                 $ HashMap.fromList
+                                 $ List.take mapListSize
+                                 $ HashMap.toList
                                 $ HashMap.filter (\g -> view gts'_score g > 1)
                                 $ view flc_scores groupedWithList

+    -- | Split candidateTerms into mono-terms and multi-terms.
    !(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms

  -- void $ panicTrace $ "groupedWithList: " <> show groupedWithList
@@ -211,6 +217,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
    !monoSize = 0.4  :: Double
    !multSize = 1 - monoSize

+    -- | Splits given hashmap into 2 pieces, based on score
    splitAt' n' ns = both (HashMap.fromListWith (<>))
                      $ List.splitAt (round $ n' * listSizeGlobal)
                      $ List.sortOn (viewScore . snd)
@@ -254,8 +261,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
                           ]
          where
            mapStemNodeIds = HashMap.toList
-                           $ HashMap.map viewScores
-                           $ groupedTreeScores_SetNodeId
+                           $ HashMap.map viewScores groupedTreeScores_SetNodeId
  let
    -- computing scores
    mapScores f = HashMap.fromList

--- a/src/Gargantext/Core/Text/List/Group/WithStem.hs
+++ b/src/Gargantext/Core/Text/List/Group/WithStem.hs
@@ -69,7 +69,7 @@ groupWith :: GroupParams
 groupWith GroupIdentity  t = identity t
 groupWith (GroupParams { unGroupParams_lang = l }) t =
                    NgramsTerm
-                  $ Text.intercalate " "
+                  $ Text.unwords
                  $ map (stem l PorterAlgorithm)
                  -- . take n
                  $ List.sort

--- a/src/Gargantext/Core/Text/Metrics/TFICF.hs
+++ b/src/Gargantext/Core/Text/Metrics/TFICF.hs
@@ -14,14 +14,15 @@ TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93id
 -}


-module Gargantext.Core.Text.Metrics.TFICF ( TFICF
+module Gargantext.Core.Text.Metrics.TFICF
+ ( TFICF
 , TficfContext(..)
 , Total(..)
 , Count(..)
 , tficf
 , sortTficf
 )
-  where
+where

 import Data.List qualified as List
 import Data.Map.Strict (toList)
@@ -34,12 +35,19 @@ path = "[G.T.Metrics.TFICF]"

 type TFICF = Double

+-- https://www.researchgate.net/publication/221226686_TF-ICF_A_New_Term_Weighting_Scheme_for_Clustering_Dynamic_Data_Streams
+-- TficfSupra n m
+--  - m is the total number of documents in the corpus
+--  - n is the number of documents, where given term occured more than once
+-- TficfInfra n m
+--  - 
+    
 data TficfContext n m = TficfInfra n m
                      | TficfSupra n m
  deriving (Show)

-data Total = Total {unTotal :: !Double}
-data Count = Count {unCount :: !Double}
+newtype Total = Total { unTotal :: Double }
+newtype Count = Count { unCount :: Double }

 tficf :: TficfContext Count Total
      -> TficfContext Count Total
@@ -50,7 +58,11 @@ tficf (TficfInfra (Count ic) (Total it) )
            | otherwise                        = panicTrace
                                               $ "[ERR]"
                                               <> path
-                                               <> " Frequency impossible"
+                                               <> " Frequency impossible: "
+                                               <> "ic = " <> show ic
+                                               <> ", it = " <> show it
+                                               <> ", sc = " <> show sc
+                                               <> ", st = " <> show st
 tficf _ _ = panicTrace $ "[ERR]" <> path <> "Undefined for these contexts"



--- a/src/Gargantext/Core/Text/Ngrams.hs
+++ b/src/Gargantext/Core/Text/Ngrams.hs
@@ -92,7 +92,8 @@ instance Hashable Ngrams

 makeLenses ''Ngrams
 instance PGS.ToRow Ngrams where
-  toRow (UnsafeNgrams t s) = [toField t, toField s]
+  toRow (UnsafeNgrams { .. }) = [ toField _ngramsTerms
+                                , toField _ngramsSize ]

 ------------------------------------------------------------------------
 -------------------------------------------------------------------------

--- a/src/Gargantext/Core/Text/Prepare.hs
+++ b/src/Gargantext/Core/Text/Prepare.hs
@@ -50,7 +50,7 @@ data Paragraph = Uniform Grain | AuthorLike
 -- Grain: number of Sentences by block of Text
 -- Step : overlap of sentence between connex block of Text
 groupUniform :: Grain -> [Text] -> [Text]
-groupUniform g ts = map (Text.intercalate " ")
+groupUniform g ts = map Text.unwords
                  $ chunkAlong g g
                  $ sentences
                  $ Text.concat ts

--- a/src/Gargantext/Core/Text/Terms.hs
+++ b/src/Gargantext/Core/Text/Terms.hs
@@ -52,7 +52,7 @@ import Gargantext.Core.Text.Terms.Mono  (monoTerms)
 import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
 import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
 import Gargantext.Core.Text.Terms.Multi (multiterms)
-import Gargantext.Core.Types ( TermsCount, POS, Terms(Terms), TermsWithCount )
+import Gargantext.Core.Types ( TermsCount, POS, Terms(..), TermsWithCount )
 import Gargantext.Core.Utils (groupWithCounts)
 import Gargantext.Database.Prelude (DBCmd)
 import Gargantext.Database.Query.Table.Ngrams (insertNgrams)
@@ -60,6 +60,7 @@ import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgr
 import Gargantext.Database.Schema.Ngrams (text2ngrams, NgramsId)
 import Gargantext.Prelude

+
 data TermType lang
  = Mono      { _tt_lang :: !lang }
  | Multi     { _tt_lang :: !lang }
@@ -86,7 +87,7 @@ extractTerms ncs (Unsupervised {..}) xs = mapM (terms ncs (Unsupervised { _tt_mo
  where
    m' = case _tt_model of
      Just m''-> m''
-      Nothing -> newTries _tt_windowSize (Text.intercalate " " xs)
+      Nothing -> newTries _tt_windowSize (Text.unwords xs)
 extractTerms ncs termTypeLang xs = mapM (terms ncs termTypeLang) xs


@@ -124,15 +125,15 @@ class ExtractNgramsT h
                   -> DBCmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
 ------------------------------------------------------------------------
 enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
-enrichedTerms l pa po (Terms ng1 ng2) =
+enrichedTerms l pa po (Terms { .. }) =
  NgramsPostag { _np_lang = l
               , _np_algo = pa
               , _np_postag = po
               , _np_form = form
               , _np_lem = lem }
    where
-      form = text2ngrams $ Text.intercalate " " ng1
-      lem  = text2ngrams $ Text.intercalate " " $ Set.toList ng2
+      form = text2ngrams $ Text.unwords _terms_label
+      lem  = text2ngrams $ Text.unwords $ Set.toList _terms_stem

 ------------------------------------------------------------------------
 cleanNgrams :: Int -> Ngrams -> Ngrams

--- a/src/Gargantext/Core/Text/Terms/WithList.hs
+++ b/src/Gargantext/Core/Text/Terms/WithList.hs
@@ -114,7 +114,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms KeepAll pat

 --------------------------------------------------------------------------
 addSpaces :: Text -> Text
-addSpaces = (Text.intercalate " ") . (Text.chunksOf 1)
+addSpaces = Text.unwords . (Text.chunksOf 1)


 --------------------------------------------------------------------------

--- a/src/Gargantext/Core/Types.hs
+++ b/src/Gargantext/Core/Types.hs
@@ -68,7 +68,7 @@ data Terms = Terms { _terms_label :: Label
                   , _terms_stem  :: Stems
                   } deriving (Ord, Show)
 instance Eq Terms where
-  (==) (Terms _ s1) (Terms _ s2) = s1 == s2
+  (==) (Terms { _terms_stem = s1 }) (Terms { _terms_stem = s2 }) = s1 == s2

 type TermsCount = Int


--- a/src/Gargantext/Core/Types/Search.hs
+++ b/src/Gargantext/Core/Types/Search.hs
@@ -151,6 +151,6 @@ instance ToHyperdataRow HyperdataContact where
  toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) =
    HyperdataRowContact (fromMaybe "FirstName" fn) (fromMaybe "LastName" ln) ou'
      where
-        ou' = maybe "CNRS" (Text.intercalate " " . _cw_organization) (head ou)
+        ou' = maybe "CNRS" (Text.unwords . _cw_organization) (head ou)
  toHyperdataRow (HyperdataContact {}) =
    HyperdataRowContact "FirstName" "LastName" "Labs"
--- a/src/Gargantext/Core/Viz/Graph/Tools.hs
+++ b/src/Gargantext/Core/Viz/Graph/Tools.hs
@@ -118,7 +118,7 @@ cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold stren

  partitions <- if (Map.size distanceMap > 0)
      then recursiveClustering' (spinglass' 1) distanceMap
-      else panic $ Text.intercalate " " [ "I can not compute the graph you request"
+      else panic $ Text.unwords [ "I can not compute the graph you request"
                                , "because either the quantity of documents"
                                , "or the quantity of terms"
                                , "are lacking."

--- a/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+++ b/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
@@ -185,12 +185,13 @@ selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt tms =
                ( int
                , toDBid NodeDocument
                , cId
-                , Values fields ((DPS.Only . unNgramsTerm) <$> (List.take 10000 tms))
+                -- , Values fields ((DPS.Only . unNgramsTerm) <$> (List.take 10000 tms))
+                , DPS.In (unNgramsTerm <$> (List.take 10000 tms))
                , cId
                , toDBid nt
                )
-    where
-      fields = [QualifiedIdentifier Nothing "text"]
+    -- where
+    --   fields = [QualifiedIdentifier Nothing "text"]

 queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
 queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
@@ -198,18 +199,42 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
                          JOIN nodes_contexts nn ON n.id = nn.context_id
                            WHERE n.typename  = ?
                            AND nn.node_id = ?),
-       input_rows(terms) AS (?)
-  SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
-    JOIN ngrams ng      ON cng.ngrams_id = ng.id
-    JOIN input_rows  ir ON ir.terms      = ng.terms
+       input_rows AS (
+           SELECT id, terms
+             FROM ngrams
+             WHERE terms IN ?
+        )
+  SELECT ir.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
+    JOIN input_rows  ir ON cng.ngrams_id = ir.id
    JOIN nodes_contexts nn ON nn.context_id   = cng.context_id
    JOIN nodes_sample n ON nn.context_id   = n.id
    WHERE nn.node_id      = ? -- CorpusId
      AND cng.ngrams_type = ? -- NgramsTypeId
      AND nn.category     > 0
-      GROUP BY cng.node_id, ng.terms
+      GROUP BY cng.node_id, ir.terms
  |]

+  
+-- queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
+-- queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
+--   WITH nodes_sample AS (SELECT c.id FROM contexts c TABLESAMPLE SYSTEM_ROWS (?)
+--                           JOIN nodes_contexts nc ON c.id = nc.context_id
+--                             WHERE c.typename  = ?
+--                             AND nc.node_id = ?),
+--        input_rows(terms) AS (?)
+--   SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
+--     JOIN ngrams ng      ON cng.ngrams_id = ng.id
+--     JOIN input_rows  ir ON ir.terms      = ng.terms
+--     JOIN nodes_contexts nc ON nc.context_id   = cng.context_id
+--     JOIN nodes_sample ns ON nc.context_id   = ns.id
+--     WHERE nc.node_id      = ? -- CorpusId
+--       AND cng.ngrams_type = ? -- NgramsTypeId
+--       AND nc.category     > 0
+--       -- AND nc.context_id IN (SELECT id FROM nodes_sample)
+--       GROUP BY cng.node_id, ng.terms
+--   |]
+
+
 selectNgramsOccurrencesOnlyByContextUser_withSample' :: HasDBid NodeType
                                                     => CorpusId
                                                     -> Int

--- a/src/Gargantext/Database/Action/Metrics/TFICF.hs
+++ b/src/Gargantext/Database/Action/Metrics/TFICF.hs
@@ -70,6 +70,9 @@ getTficf_withSample cId mId nt = do
                     <$> getOccByNgramsOnlyFast_withSample mId countGlobal nt
                            (HM.keys mapTextDoubleLocal)

+  printDebug "[getTficf_withSample] mapTextDoubleLocal: " mapTextDoubleLocal
+  printDebug "[getTficf_withSample] mapTextDoubleGlobal: " mapTextDoubleGlobal
+
  --printDebug "getTficf_withSample" (mapTextDoubleLocal, mapTextDoubleGlobal, countLocal, countGlobal)
  pure $ HM.mapWithKey (\t n ->
      tficf (TficfInfra (Count n                                               )

--- a/src/Gargantext/Database/Prelude.hs
+++ b/src/Gargantext/Database/Prelude.hs
@@ -207,7 +207,7 @@ fromField' field mb = do
          valueToHyperdata v = case fromJSON v of
             Success a  -> pure a
             Error _err -> returnError ConversionFailed field
-                         $ DL.intercalate " " [ "cannot parse hyperdata for JSON: "
+                         $ DL.unwords [ "cannot parse hyperdata for JSON: "
                                      , show v
                                      ]


--- a/src/Gargantext/Database/Query/Table/Ngrams.hs
+++ b/src/Gargantext/Database/Query/Table/Ngrams.hs
@@ -28,7 +28,7 @@ import Data.HashMap.Strict qualified as HashMap
 import Data.List qualified as List
 import Data.Map.Strict qualified as Map
 import Database.PostgreSQL.Simple qualified as PGS
-import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
+import Gargantext.Core.Text.Ngrams (Ngrams(..), NgramsType)
 import Gargantext.Database.Admin.Types.Node ( pgNodeId, CorpusId, ListId, DocId )
 import Gargantext.Database.Prelude (runOpaQuery, formatPGSQuery, runPGSQuery, DBCmd)
 import Gargantext.Database.Query.Join (leftJoin3)
@@ -79,14 +79,15 @@ insertNgrams ns =

 -- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
 insertNgrams' :: [Ngrams] -> DBCmd err [Indexed Int Text]
-insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
+insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns')
  where
-    fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
+    ns' = (\n -> (_ngramsTerms n, _ngramsSize n)) <$> ns
+    fields = map (QualifiedIdentifier Nothing) ["text", "int4"]

 _insertNgrams_Debug :: [(Text, Size)] -> DBCmd err ByteString
 _insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
  where
-    fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
+    fields = map (QualifiedIdentifier Nothing) ["text", "int4"]

 ----------------------
 queryInsertNgrams :: PGS.Query

--- a/src/Gargantext/Database/Query/Table/NgramsPostag.hs
+++ b/src/Gargantext/Database/Query/Table/NgramsPostag.hs
@@ -53,14 +53,14 @@ type NgramsPostagInsert = ( Int
                          )

 toInsert :: NgramsPostag -> NgramsPostagInsert
-toInsert (NgramsPostag l a p form lem) =
-  ( toDBid l
-  , toDBid a
-  , show p
-  , view ngramsTerms form
-  , view ngramsSize  form
-  , view ngramsTerms lem
-  , view ngramsSize  lem
+toInsert (NgramsPostag { .. }) =
+  ( toDBid _np_lang
+  , toDBid _np_algo
+  , show _np_postag
+  , view ngramsTerms _np_form
+  , view ngramsSize  _np_form
+  , view ngramsTerms _np_lem
+  , view ngramsSize  _np_lem
  )

 insertNgramsPostag :: [NgramsPostag] -> DBCmd err (HashMap Text NgramsId)
@@ -154,17 +154,18 @@ SELECT terms,id FROM ins_form_ret
 -- TODO add lang and postag algo
 -- TODO remove when form == lem in insert
 selectLems :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
-selectLems l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems (PGS.In (map _ngramsTerms ns), toDBid l, toDBid server)
+selectLems l (NLPServerConfig { server }) ns =
+  runPGSQuery querySelectLems (PGS.In (_ngramsTerms <$> ns), toDBid l, toDBid server)
 ----------------------
 querySelectLems :: PGS.Query
 querySelectLems = [sql|
  WITH
     trms
-       AS (SELECT id, terms, n
+       AS (SELECT id, terms
            FROM ngrams
            WHERE terms IN ?)
-   , input_rows(lang_id, algo_id, terms,n)
-    AS (SELECT ? as lang_id, ? as algo_id, terms, n, id
+   , input_rows
+    AS (SELECT ? as lang_id, ? as algo_id, terms, id
         FROM trms)
    , lems AS ( select ir.terms as t1, n2.terms as t2, sum(np.score) as score from input_rows ir
    JOIN ngrams_postag np ON np.ngrams_id = ir.id
@@ -179,29 +180,29 @@ querySelectLems = [sql|
  |]

 -- | This is the same as 'selectLems', but slower.
-selectLems' :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
-selectLems' l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems' (PGS.Only $ Values fields datas)
-  where
-    fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"]
-    datas  = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns
-
-    
-querySelectLems' :: PGS.Query
-querySelectLems' = [sql|
-  WITH input_rows(lang_id, algo_id, terms,n)
-    AS (?) -- ((VALUES ('automata' :: "text")))
-    , lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir
-    JOIN ngrams        n1 ON ir.terms = n1.terms
-    JOIN ngrams_postag np ON np.ngrams_id = n1.id
-    JOIN ngrams        n2 ON n2.id    = np.lemm_id
-    WHERE np.lang_id = ir.lang_id
-      AND np.algo_id = ir.algo_id
-    GROUP BY n1.terms, n2.terms
-    ORDER BY score DESC
-  )
-
-  SELECT t1,t2 from lems
-  |]
+-- selectLems' :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
+-- selectLems' l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems' (PGS.Only $ Values fields datas)
+--   where
+--     fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"]
+--     datas  = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns
+
+    
+-- querySelectLems' :: PGS.Query
+-- querySelectLems' = [sql|
+--   WITH input_rows(lang_id, algo_id, terms,n)
+--     AS (?) -- ((VALUES ('automata' :: "text")))
+--     , lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir
+--     JOIN ngrams        n1 ON ir.terms = n1.terms
+--     JOIN ngrams_postag np ON np.ngrams_id = n1.id
+--     JOIN ngrams        n2 ON n2.id    = np.lemm_id
+--     WHERE np.lang_id = ir.lang_id
+--       AND np.algo_id = ir.algo_id
+--     GROUP BY n1.terms, n2.terms
+--     ORDER BY score DESC
+--   )
+
+--   SELECT t1,t2 from lems
+--   |]

 -- | Insert Table
 createTable_NgramsPostag :: DBCmd err [Int]

--- a/src/Gargantext/Database/Query/Table/Node.hs
+++ b/src/Gargantext/Database/Query/Table/Node.hs
@@ -173,15 +173,26 @@ getChildrenByType :: HasDBid NodeType
                  -> NodeType
                  -> DBCmd err [NodeId]
 getChildrenByType nId nType = do
-  result <- runPGSQuery query (PGS.Only nId)
-  children_lst <- mapM (\(id, _) -> getChildrenByType id nType) result
-  pure $ concat $ [fst <$> filter (\(_, pTypename) -> pTypename == toDBid nType) result] ++ children_lst
+  childrenFirstLevel <- getClosestChildrenByType nId nType
+  childrenLst <- mapM (\id -> getChildrenByType id nType) childrenFirstLevel
+  pure $ childrenFirstLevel ++ concat childrenLst
+
+-- | Given a node id, find all it's children (only first level) of
+-- given node type.
+getClosestChildrenByType :: HasDBid NodeType
+                         => NodeId
+                         -> NodeType
+                         -> DBCmd err [NodeId]
+getClosestChildrenByType nId nType = do
+  results <- runPGSQuery query (nId, toDBid nType)
+  pure $ (\(PGS.Only nodeId) -> nodeId) <$> results
  where
    query :: PGS.Query
    query = [sql|
-      SELECT n.id, n.typename
+      SELECT n.id
      FROM nodes n
-      WHERE n.parent_id = ?;
+      WHERE n.parent_id = ?
+      AND n.typename = ?;
    |]

 ------------------------------------------------------------------------

--- a/src/Gargantext/Database/Query/Table/NodeContext.hs
+++ b/src/Gargantext/Database/Query/Table/NodeContext.hs
@@ -317,6 +317,15 @@ nodeContextsScore inputData = map (\(PGS.Only a) -> a)


 ------------------------------------------------------------------------
+-- | Counts the number of documents in a corpus.
+--   Also applies filter for category to be at least 1 (i.e. not in trash).
+--     select count(*)
+--     from contexts c 
+--     join nodes_contexts nc on c.id = nc.context_id 
+--     where 
+--     nc.node_id = 88
+--     and nc.category >= 1
+--     and c.typename = 4
 selectCountDocs :: HasDBid NodeType => CorpusId -> DBCmd err Int
 selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
  where

--- a/src/Gargantext/Database/Schema/Ngrams.hs
+++ b/src/Gargantext/Database/Schema/Ngrams.hs
@@ -39,6 +39,10 @@ import Gargantext.Prelude
 type NgramsId  = Int
 type Size      = Int

+-- | Ngrams table
+-- 'n' is the size, see G.D.Q.T.Ngrams -> insertNgrams'
+-- function. I.e. ngrams with 1 term are of size 1, ngrams with 2
+-- terms are of size 2 etc.
 data NgramsPoly id terms n = NgramsDB { _ngrams_id    :: !id
                                      , _ngrams_terms :: !terms
                                      , _ngrams_n     :: !n
@@ -90,7 +94,8 @@ instance PGS.ToRow Text where
  toRow t = [toField t]

 text2ngrams :: Text -> Ngrams
-text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
+text2ngrams txt = UnsafeNgrams { _ngramsTerms = txt'
+                               , _ngramsSize = length $ splitOn " " txt' }
  where
    txt' = strip txt


--- a/src/Gargantext/Database/Types.hs
+++ b/src/Gargantext/Database/Types.hs
@@ -21,7 +21,9 @@ import Gargantext.Database.Schema.Prelude
 import Gargantext.Prelude


-- | Index memory of any type in Gargantext
+-- | Index memory of any type in Gargantext.
+-- I.e. given entity 'a', we use this type to mark that it has a DB id of type 'i'.
+-- An un-indexed entity 'a' might not have been INSERT-ed yet to the DB.
 data Indexed i a =
  Indexed { _index     :: !i
          , _unIndex   :: !a