[nodestory] some Flow rewrite

Split into Extract/Types/Utils modules. Found some functions that are unused.

[nodestory] some Flow rewrite
Split into Extract/Types/Utils modules. Found some functions that are unused.
ab68f83b · Przemyslaw Kaminski · 6b9588b1 · ab68f83b · ab68f83b · ab68f83b
Verified Commit ab68f83b authored Feb 15, 2024 by Przemyslaw Kaminski
8 changed files
--- a/gargantext.cabal
+++ b/gargantext.cabal
@@ -122,6 +122,7 @@ library
      Gargantext.Core.Methods.Similarities
      Gargantext.Core.NLP
      Gargantext.Core.NodeStory
+      Gargantext.Core.NodeStory.DB
      Gargantext.Core.NodeStory.Types
      Gargantext.Core.Text
      Gargantext.Core.Text.Context
@@ -341,6 +342,7 @@ library
      Gargantext.Database
      Gargantext.Database.Action.Delete
      Gargantext.Database.Action.Flow.Annuaire
+      Gargantext.Database.Action.Flow.Extract
      Gargantext.Database.Action.Flow.List
      Gargantext.Database.Action.Flow.Pairing
      Gargantext.Database.Action.Flow.Utils

--- a/src/Gargantext/Core/NodeStory.hs
+++ b/src/Gargantext/Core/NodeStory.hs
 {-|
 Module      : Gargantext.Core.NodeStory
-Description : Node API generation
+Description : NodeStory
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -50,10 +50,6 @@ module Gargantext.Core.NodeStory
  , getNodesArchiveHistory
  , Archive(..)
  , nodeExists
-  , runPGSQuery
-  , runPGSAdvisoryLock
-  , runPGSAdvisoryUnlock
-  , runPGSAdvisoryXactLock
  , getNodesIdWithType
  , fromDBNodeStoryEnv
  , upsertNodeStories
@@ -67,98 +63,23 @@ where

 import Control.Lens ((^.), (.~), (%~), non, _Just, at, view)
 import Control.Monad.Except
-import Data.HashMap.Strict (HashMap)
-import Data.HashMap.Strict qualified as HashMap
 import Data.Map.Strict qualified as Map
-import Data.Map.Strict.Patch qualified as PM
 import Data.Monoid
 import Data.Pool (Pool, withResource)
 import Data.Set qualified as Set
 import Database.PostgreSQL.Simple qualified as PGS
 import Database.PostgreSQL.Simple.SqlQQ (sql)
 import Database.PostgreSQL.Simple.ToField qualified as PGS
-import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
 import Gargantext.API.Ngrams.Types
-import Gargantext.Core (toDBid)
+import Gargantext.Core.NodeStory.DB
 import Gargantext.Core.NodeStory.Types
-import Gargantext.Core.Types (ListId, NodeId(..), NodeType)
+import Gargantext.Core.Types (ListId, NodeId(..))
 import Gargantext.Database.Admin.Config ()
 import Gargantext.Database.Prelude (HasConnectionPool(..))
 import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
-import Gargantext.Database.Schema.Ngrams (NgramsType)
 import Gargantext.Prelude hiding (to)
 import Gargantext.Prelude.Database

-- DB stuff
-
-nodeExists :: PGS.Connection -> NodeId -> IO Bool
-nodeExists c nId = (== [PGS.Only True])
-  <$> runPGSQuery c [sql| SELECT true FROM nodes WHERE id = ? LIMIT 1 |]
-                    (PGS.Only nId)
-
-getNodesIdWithType :: PGS.Connection -> NodeType -> IO [NodeId]
-getNodesIdWithType c nt = do
-  ns <- runPGSQuery c query (PGS.Only $ toDBid nt)
-  pure $ map (\(PGS.Only nId) -> UnsafeMkNodeId nId) ns
-  where
-    query :: PGS.Query
-    query = [sql| SELECT id FROM nodes WHERE typename = ? |]
-
-
-- /!\ This function is using an hard coded parameter
-- which depends on the Ngrams List Flow
-- Version > 5 is hard coded because by default
-- first version of history of manual change is 6
-getNodesArchiveHistory :: PGS.Connection
-                       -> [NodeId]
-                       -> IO [(NodeId, (Map NgramsType [HashMap NgramsTerm NgramsPatch]))]
-getNodesArchiveHistory c nodesId = do
-  as <- runPGSQuery c query (PGS.Only $ Values fields nodesId)
-                            :: IO [(Int, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
-
-  pure $ map (\(nId, ngramsType, terms, patch)
-               -> ( UnsafeMkNodeId nId
-                  , Map.singleton ngramsType [HashMap.singleton terms patch]
-                  )
-             ) as
-  where
-
-    fields = [QualifiedIdentifier Nothing "int4"]
-    query :: PGS.Query
-    query = [sql| WITH nodes_id(nid) as (?)
-                    SELECT node_id, ngrams_type_id, terms, patch
-                    FROM node_story_archive_history
-                    JOIN ngrams ON ngrams.id = ngrams_id
-                    JOIN nodes_id n ON node_id = n.nid
-                    WHERE version > 5
-                    ORDER BY (version, node_story_archive_history.id) DESC
-            |]
-
-
-insertNodeArchiveHistory :: PGS.Connection -> NodeId -> Version -> [NgramsStatePatch'] -> IO ()
-insertNodeArchiveHistory _ _ _ [] = pure ()
-insertNodeArchiveHistory c nodeId version (h:hs) = do
-  let tuples = mconcat $ (\(nType, NgramsTablePatch patch) ->
-                           (\(term, p) ->
-                              (nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
-  tuplesM <- mapM (\(nId, nType, term, patch) -> do
-                      [PGS.Only ngramsId] <- runPGSReturning c qInsert [PGS.Only term] :: IO [PGS.Only Int]
-                      pure (nId, nType, ngramsId, term, patch)
-                  ) tuples :: IO [(NodeId, TableNgrams.NgramsType, Int, NgramsTerm, NgramsPatch)]
-  _ <- runPGSExecuteMany c query $ ((\(nId, nType, termId, _term, patch) -> (nId, nType, termId, patch, version)) <$> tuplesM)
-  _ <- insertNodeArchiveHistory c nodeId version hs
-  pure ()
-  where
-    qInsert :: PGS.Query
-    qInsert = [sql|INSERT INTO ngrams (terms) VALUES (?)
-                  ON CONFLICT (terms) DO UPDATE SET terms = excluded.terms
-                  RETURNING id|]
-
-    -- https://stackoverflow.com/questions/39224438/postgresql-insert-if-foreign-key-exists
-    query :: PGS.Query
-    query = [sql| INSERT INTO node_story_archive_history(node_id, ngrams_type_id, ngrams_id, patch, version)
-                VALUES (?, ?, ?, ?, ?)
-                |]

 getNodeStory' :: PGS.Connection -> NodeId -> IO ArchiveList
 getNodeStory' c nId = do
@@ -196,14 +117,6 @@ getNodeStory c nId = do
  a <- getNodeStory' c nId
  pure $ NodeStory $ Map.singleton nId a

-
-nodeStoriesQuery :: PGS.Query
-nodeStoriesQuery = [sql| SELECT version, ngrams_type_id, terms, ngrams_repo_element
-                           FROM node_stories
-                           JOIN ngrams ON ngrams.id = ngrams_id
-                           WHERE node_id = ?
-                           |]
-
 -- |Functions to convert archive state (which is a `Map NgramsType
 --  (Map NgramsTerm NgramsRepoElement`)) to/from a flat list
 archiveStateToList :: NgramsState' -> ArchiveStateList
@@ -224,53 +137,6 @@ insertNodeStory :: PGS.Connection -> NodeId -> ArchiveList -> IO ()
 insertNodeStory c nId a = do
  insertArchiveStateList c nId (a ^. a_version) (archiveStateToList $ a ^. a_state)

-insertArchiveStateList :: PGS.Connection -> NodeId -> Version -> ArchiveStateList -> IO ()
-insertArchiveStateList c nodeId version as = do
-  mapM_ performInsert as
-  where
-    performInsert (ngramsType, ngrams, ngramsRepoElement) = do
-      [PGS.Only ngramsId] <- tryInsertTerms ngrams
-      _ <- case ngramsRepoElement ^. nre_root of
-        Nothing -> pure []
-        Just r -> tryInsertTerms r
-      mapM_ tryInsertTerms $ ngramsRepoElement ^. nre_children
-      runPGSExecute c query (nodeId, ngramsId, version, ngramsType, ngramsRepoElement)
-    
-    tryInsertTerms :: NgramsTerm -> IO [PGS.Only Int]
-    tryInsertTerms t = runPGSReturning c qInsert [PGS.Only t]
-    
-    qInsert :: PGS.Query
-    qInsert = [sql|INSERT INTO ngrams (terms) VALUES (?)
-                  ON CONFLICT (terms) DO UPDATE SET terms = excluded.terms
-                  RETURNING id|]
-    
-    query :: PGS.Query
-    query = [sql|INSERT INTO node_stories(node_id, ngrams_id, version, ngrams_type_id, ngrams_repo_element)
-                VALUES (?, ?, ?, ?, ? :: jsonb)
-                |]
-
-deleteArchiveStateList :: PGS.Connection -> NodeId -> ArchiveStateList -> IO ()
-deleteArchiveStateList c nodeId as = do
-  mapM_ (\(nt, n, _) -> runPGSExecute c query (nodeId, nt, n)) as
-  where
-    query :: PGS.Query
-    query = [sql| DELETE FROM node_stories
-                WHERE node_id = ? AND ngrams_type_id = ?
-                  AND ngrams_id IN (SELECT id FROM ngrams WHERE terms = ?)
-                  |]
-
-updateArchiveStateList :: PGS.Connection -> NodeId -> Version -> ArchiveStateList -> IO ()
-updateArchiveStateList c nodeId version as = do
-  let params = (\(nt, n, nre) -> (nre, version, nodeId, nt, n)) <$> as
-  mapM_ (runPGSExecute c query) params
-  where
-    query :: PGS.Query
-    query = [sql| UPDATE node_stories
-                SET ngrams_repo_element = ?, version = ?
-                WHERE node_id = ? AND ngrams_type_id = ?
-                  AND ngrams_id IN (SELECT id FROM ngrams WHERE terms = ?)
-                  |]
-
 -- | This function updates the node story and archive for given node_id.
 updateNodeStory :: PGS.Connection -> NodeId -> ArchiveList -> ArchiveList -> IO ()
 updateNodeStory c nodeId currentArchive newArchive = do
@@ -342,17 +208,6 @@ upsertNodeStories c nodeId newArchive = do

    -- printDebug "[upsertNodeStories] STOP nId" nId

-updateNodeStoryVersion :: PGS.Connection -> NodeId -> ArchiveList -> IO ()
-updateNodeStoryVersion c nodeId newArchive = do
-  let ngramsTypes = Map.keys $ newArchive ^. a_state
-  mapM_ (\nt -> runPGSExecute c query (newArchive ^. a_version, nodeId, nt)) ngramsTypes
-  where
-    query :: PGS.Query
-    query = [sql|UPDATE node_stories
-                SET version = ?
-                WHERE node_id = ?
-                AND ngrams_type_id = ?|]
-
 -- | Returns a `NodeListStory`, updating the given one for given `NodeId`
 nodeStoryInc :: PGS.Connection -> NodeListStory -> NodeId -> IO NodeListStory
 nodeStoryInc c ns@(NodeStory nls) nId = do

--- a/src/Gargantext/Core/NodeStory/DB.hs
+++ b/src/Gargantext/Core/NodeStory/DB.hs
+{-|
+Module      : Gargantext.Core.NodeStory.DB
+Description : NodeStory DB functions
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+-}
+
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE ConstraintKinds   #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE TemplateHaskell   #-}
+
+module Gargantext.Core.NodeStory.DB
+  ( nodeExists
+  , getNodesIdWithType
+  , getNodesArchiveHistory
+  , insertNodeArchiveHistory
+  , nodeStoriesQuery
+  , insertArchiveStateList
+  , deleteArchiveStateList
+  , updateArchiveStateList
+  , updateNodeStoryVersion )
+where
+
+import Control.Lens ((^.))
+import Control.Monad.Except
+import Data.HashMap.Strict (HashMap)
+import Data.HashMap.Strict qualified as HashMap
+import Data.Map.Strict qualified as Map
+import Data.Map.Strict.Patch qualified as PM
+import Data.Monoid
+import Database.PostgreSQL.Simple qualified as PGS
+import Database.PostgreSQL.Simple.SqlQQ (sql)
+import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
+import Gargantext.API.Ngrams.Types
+import Gargantext.Core (toDBid)
+import Gargantext.Core.NodeStory.Types
+import Gargantext.Core.Types (NodeId(..), NodeType)
+import Gargantext.Database.Admin.Config ()
+import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
+import Gargantext.Database.Schema.Ngrams (NgramsType)
+import Gargantext.Prelude hiding (to)
+import Gargantext.Prelude.Database
+
+
+nodeExists :: PGS.Connection -> NodeId -> IO Bool
+nodeExists c nId = (== [PGS.Only True])
+  <$> runPGSQuery c [sql| SELECT true FROM nodes WHERE id = ? LIMIT 1 |]
+                    (PGS.Only nId)
+
+getNodesIdWithType :: PGS.Connection -> NodeType -> IO [NodeId]
+getNodesIdWithType c nt = do
+  ns <- runPGSQuery c query (PGS.Only $ toDBid nt)
+  pure $ map (\(PGS.Only nId) -> UnsafeMkNodeId nId) ns
+  where
+    query :: PGS.Query
+    query = [sql| SELECT id FROM nodes WHERE typename = ? |]
+
+
+-- /!\ This function is using an hard coded parameter
+-- which depends on the Ngrams List Flow
+-- Version > 5 is hard coded because by default
+-- first version of history of manual change is 6
+getNodesArchiveHistory :: PGS.Connection
+                       -> [NodeId]
+                       -> IO [(NodeId, (Map NgramsType [HashMap NgramsTerm NgramsPatch]))]
+getNodesArchiveHistory c nodesId = do
+  as <- runPGSQuery c query (PGS.Only $ Values fields nodesId)
+                            :: IO [(Int, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
+
+  pure $ map (\(nId, ngramsType, terms, patch)
+               -> ( UnsafeMkNodeId nId
+                  , Map.singleton ngramsType [HashMap.singleton terms patch]
+                  )
+             ) as
+  where
+
+    fields = [QualifiedIdentifier Nothing "int4"]
+    query :: PGS.Query
+    query = [sql| WITH nodes_id(nid) as (?)
+                    SELECT node_id, ngrams_type_id, terms, patch
+                    FROM node_story_archive_history
+                    JOIN ngrams ON ngrams.id = ngrams_id
+                    JOIN nodes_id n ON node_id = n.nid
+                    WHERE version > 5
+                    ORDER BY (version, node_story_archive_history.id) DESC
+            |]
+
+
+insertNodeArchiveHistory :: PGS.Connection -> NodeId -> Version -> [NgramsStatePatch'] -> IO ()
+insertNodeArchiveHistory _ _ _ [] = pure ()
+insertNodeArchiveHistory c nodeId version (h:hs) = do
+  let tuples = mconcat $ (\(nType, NgramsTablePatch patch) ->
+                           (\(term, p) ->
+                              (nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
+  tuplesM <- mapM (\(nId, nType, term, patch) -> do
+                      [PGS.Only ngramsId] <- runPGSReturning c qInsert [PGS.Only term] :: IO [PGS.Only Int]
+                      pure (nId, nType, ngramsId, term, patch)
+                  ) tuples :: IO [(NodeId, TableNgrams.NgramsType, Int, NgramsTerm, NgramsPatch)]
+  _ <- runPGSExecuteMany c query $ ((\(nId, nType, termId, _term, patch) -> (nId, nType, termId, patch, version)) <$> tuplesM)
+  _ <- insertNodeArchiveHistory c nodeId version hs
+  pure ()
+  where
+    qInsert :: PGS.Query
+    qInsert = [sql|INSERT INTO ngrams (terms) VALUES (?)
+                  ON CONFLICT (terms) DO UPDATE SET terms = excluded.terms
+                  RETURNING id|]
+
+    -- https://stackoverflow.com/questions/39224438/postgresql-insert-if-foreign-key-exists
+    query :: PGS.Query
+    query = [sql| INSERT INTO node_story_archive_history(node_id, ngrams_type_id, ngrams_id, patch, version)
+                VALUES (?, ?, ?, ?, ?)
+                |]
+
+      
+nodeStoriesQuery :: PGS.Query
+nodeStoriesQuery = [sql| SELECT version, ngrams_type_id, terms, ngrams_repo_element
+                           FROM node_stories
+                           JOIN ngrams ON ngrams.id = ngrams_id
+                           WHERE node_id = ?
+                           |]
+
+
+-- Archive
+
+
+insertArchiveStateList :: PGS.Connection -> NodeId -> Version -> ArchiveStateList -> IO ()
+insertArchiveStateList c nodeId version as = do
+  mapM_ performInsert as
+  where
+    performInsert (ngramsType, ngrams, ngramsRepoElement) = do
+      [PGS.Only ngramsId] <- tryInsertTerms ngrams
+      _ <- case ngramsRepoElement ^. nre_root of
+        Nothing -> pure []
+        Just r -> tryInsertTerms r
+      mapM_ tryInsertTerms $ ngramsRepoElement ^. nre_children
+      runPGSExecute c query (nodeId, ngramsId, version, ngramsType, ngramsRepoElement)
+    
+    tryInsertTerms :: NgramsTerm -> IO [PGS.Only Int]
+    tryInsertTerms t = runPGSReturning c qInsert [PGS.Only t]
+    
+    qInsert :: PGS.Query
+    qInsert = [sql|INSERT INTO ngrams (terms) VALUES (?)
+                  ON CONFLICT (terms) DO UPDATE SET terms = excluded.terms
+                  RETURNING id|]
+    
+    query :: PGS.Query
+    query = [sql|INSERT INTO node_stories(node_id, ngrams_id, version, ngrams_type_id, ngrams_repo_element)
+                VALUES (?, ?, ?, ?, ? :: jsonb)
+                |]
+
+deleteArchiveStateList :: PGS.Connection -> NodeId -> ArchiveStateList -> IO ()
+deleteArchiveStateList c nodeId as = do
+  mapM_ (\(nt, n, _) -> runPGSExecute c query (nodeId, nt, n)) as
+  where
+    query :: PGS.Query
+    query = [sql| DELETE FROM node_stories
+                WHERE node_id = ? AND ngrams_type_id = ?
+                  AND ngrams_id IN (SELECT id FROM ngrams WHERE terms = ?)
+                  |]
+
+updateArchiveStateList :: PGS.Connection -> NodeId -> Version -> ArchiveStateList -> IO ()
+updateArchiveStateList c nodeId version as = do
+  let params = (\(nt, n, nre) -> (nre, version, nodeId, nt, n)) <$> as
+  mapM_ (runPGSExecute c query) params
+  where
+    query :: PGS.Query
+    query = [sql| UPDATE node_stories
+                SET ngrams_repo_element = ?, version = ?
+                WHERE node_id = ? AND ngrams_type_id = ?
+                  AND ngrams_id IN (SELECT id FROM ngrams WHERE terms = ?)
+                  |]
+
+
+updateNodeStoryVersion :: PGS.Connection -> NodeId -> ArchiveList -> IO ()
+updateNodeStoryVersion c nodeId newArchive = do
+  let ngramsTypes = Map.keys $ newArchive ^. a_state
+  mapM_ (\nt -> runPGSExecute c query (newArchive ^. a_version, nodeId, nt)) ngramsTypes
+  where
+    query :: PGS.Query
+    query = [sql|UPDATE node_stories
+                SET version = ?
+                WHERE node_id = ?
+                AND ngrams_type_id = ?|]
--- a/src/Gargantext/Database/Action/Flow.hs
+++ b/src/Gargantext/Database/Action/Flow.hs
--- a/src/Gargantext/Database/Action/Flow/Extract.hs
+++ b/src/Gargantext/Database/Action/Flow/Extract.hs
+{-|
+Module      : Gargantext.Database.Flow.Extract
+Description : Database Flow
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+-}
+
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+{-# LANGUAGE InstanceSigs            #-}
+
+
+module Gargantext.Database.Action.Flow.Extract
+    where
+
+import Control.Lens ((^.), _Just, view)
+import Data.HashMap.Strict qualified as HashMap
+import Data.Map.Strict qualified as DM
+import Gargantext.Core (Lang, NLPServerConfig, PosTagAlgo(CoreNLP)) 
+import Gargantext.Core.Text (HasText(..))
+import Gargantext.Core.Text.Corpus.Parsers (splitOn)
+import Gargantext.Core.Text.Terms (ExtractNgramsT, ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractNgramsT, extractTerms, tt_lang)
+import Gargantext.Core.Types (POS(NP), TermsCount)
+import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact, HyperdataDocument, cw_lastName, hc_who, hd_authors, hd_bdd, hd_institutes, hd_source)
+import Gargantext.Database.Admin.Types.Node
+import Gargantext.Database.Prelude (DBCmd)
+import Gargantext.Database.Schema.Ngrams
+import Gargantext.Database.Schema.Node (NodePoly(..))
+import Gargantext.Prelude
+
+
+
+------------------------------------------------------------------------
+instance ExtractNgramsT HyperdataContact
+  where
+    extractNgramsT _ncs l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
+      where
+        extract :: TermType Lang -> HyperdataContact
+                -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
+        extract _l hc' = do
+          let authors = map text2ngrams
+                      $ maybe ["Nothing"] (\a -> [a])
+                      $ view (hc_who . _Just . cw_lastName) hc'
+
+          pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
+
+
+instance ExtractNgramsT HyperdataDocument
+  where
+    extractNgramsT :: NLPServerConfig
+                   -> TermType Lang
+                   -> HyperdataDocument
+                   -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
+    extractNgramsT ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd
+      where
+        extractNgramsT' :: HyperdataDocument
+                        -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
+        extractNgramsT' doc = do
+          let source    = text2ngrams
+                        $ maybe "Nothing" identity
+                        $ doc ^. hd_source
+
+              institutes = map text2ngrams
+                         $ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd))
+                         $ doc ^. hd_institutes
+
+              authors    = map text2ngrams
+                         $ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
+                         $ doc ^. hd_authors
+
+          termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang ^. tt_lang) CoreNLP NP t, cnt))
+                              <$> concat
+                              <$> liftBase (extractTerms ncs lang $ hasText doc)
+
+          pure $ HashMap.fromList
+               $  [(SimpleNgrams source, (DM.singleton Sources     1, 1))                    ]
+               <> [(SimpleNgrams     i', (DM.singleton Institutes  1, 1)) | i' <- institutes ]
+               <> [(SimpleNgrams     a', (DM.singleton Authors     1, 1)) | a' <- authors    ]
+               <> [(EnrichedNgrams   t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts'     ]
+
+instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
+  where
+    extractNgramsT ncs l (Node { _node_hyperdata = h }) = extractNgramsT ncs l h
+
+
+instance HasText a => HasText (Node a)
+  where
+    hasText (Node { _node_hyperdata = h }) = hasText h
+
+
+-- Apparently unused functions
+
+-- extractInsert :: ( HasNodeStory env err m
+--                  , HasNLPServer env )
+--               => [Node HyperdataDocument] -> m ()
+-- extractInsert docs = do
+--   let documentsWithId = map (\doc -> Indexed (doc ^. node_id) doc) docs
+--   let lang = EN
+--   ncs <- view $ nlpServerGet lang
+--   mapNgramsDocs' <- mapNodeIdNgrams
+--                 <$> documentIdWithNgrams
+--                     (extractNgramsT ncs $ withLang (Multi lang) documentsWithId)
+--                     documentsWithId
+--   _ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
+--   pure ()
+
+
--- a/src/Gargantext/Database/Action/Flow/Types.hs
+++ b/src/Gargantext/Database/Action/Flow/Types.hs
@@ -9,29 +9,40 @@ Portability : POSIX

 -}

-{-# OPTIONS_GHC -fno-warn-orphans    #-}
-
 {-# LANGUAGE ConstraintKinds         #-}
 {-# LANGUAGE ConstrainedClassMethods #-}
 {-# LANGUAGE ConstraintKinds         #-}
 {-# LANGUAGE InstanceSigs            #-}
+{-# LANGUAGE TemplateHaskell         #-}

 module Gargantext.Database.Action.Flow.Types
    where

+import Conduit (ConduitT)
+import Control.Lens (makeLenses)
 import Data.Aeson (ToJSON)
-
-import Gargantext.Core.Types (HasValidationError)
+import Data.Aeson.TH (deriveJSON)
+import Data.HashMap.Strict (HashMap)
+import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
 import Gargantext.Core.Flow.Types
-import Gargantext.Core.Text
 import Gargantext.Core.NodeStory
+import Gargantext.Core.Text
+import Gargantext.Core.Text.Corpus.API qualified as API
 import Gargantext.Core.Text.Terms
-import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
+import Gargantext.Core.Types (HasValidationError, TermsCount)
+import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
+import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument)
+import Gargantext.Database.Admin.Types.Node (NodeId)
 import Gargantext.Database.Prelude (CmdM)
 import Gargantext.Database.Query.Table.Node.Document.Insert
+import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
 import Gargantext.Database.Query.Tree.Error (HasTreeError)
+import Gargantext.Database.Schema.Ngrams (NgramsType(..))
+import Gargantext.Database.Types (Indexed)
+import Gargantext.Prelude
 import Gargantext.System.Logging

+
 type FlowCmdM env err m =
  ( CmdM     env err m
  , HasNodeStory env err m
@@ -56,3 +67,27 @@ type FlowInsertDB a = ( AddUniqId a
                      , UniqParameters a
                      , InsertDb  a
                      )
+
+
+
+data DocumentIdWithNgrams a b =
+     DocumentIdWithNgrams
+     { documentWithId :: Indexed NodeId a
+     , documentNgrams :: HashMap b (Map NgramsType Int, TermsCount)
+     } deriving (Show)
+
+
+-- TODO use internal with API name (could be old data)
+data DataOrigin = InternalOrigin { _do_api :: API.ExternalAPIs }
+                | ExternalOrigin { _do_api :: API.ExternalAPIs }
+               -- TODO Web
+  deriving (Generic, Eq)
+
+makeLenses ''DataOrigin
+deriveJSON (unPrefix "_do_") ''DataOrigin
+instance ToSchema DataOrigin where
+  declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
+
+data DataText = DataOld ![NodeId]
+              | DataNew !(Maybe Integer, ConduitT () HyperdataDocument IO ())
+              --- | DataNew ![[HyperdataDocument]]
--- a/src/Gargantext/Database/Action/Flow/Utils.hs
+++ b/src/Gargantext/Database/Action/Flow/Utils.hs
@@ -9,30 +9,47 @@ Portability : POSIX

 -}

+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+{-# LANGUAGE InstanceSigs            #-}
+

 module Gargantext.Database.Action.Flow.Utils
-    where
+  ( docNgrams
+  , documentIdWithNgrams
+  , insertDocNgrams
+  , insertDocs
+  , mapNodeIdNgrams )
+where

 import Control.Lens ((^.))
 import Data.HashMap.Strict (HashMap)
 import Data.HashMap.Strict qualified as HashMap
+import Data.List qualified as List
 import Data.Map.Strict qualified as DM
+import Data.Text qualified as T
+import Gargantext.API.Ngrams.Types qualified as NT
+import Gargantext.Core (Lang, toDBid)
+import Gargantext.Core.Flow.Types (UniqId, uniqId)
+import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatternsWith, termsInText)
 import Gargantext.Core.Types (TermsCount)
+import Gargantext.Core.Utils (addTuples)
+import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap
+import Gargantext.Database.Action.Flow.Types (DocumentIdWithNgrams(..), FlowInsertDB)
+import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument, hd_abstract, hd_title)
 import Gargantext.Database.Admin.Types.Node
-import Gargantext.Database.Prelude (DBCmd)
+import Gargantext.Database.Prelude (DBCmd, DbCmd')
 import Gargantext.Database.Query.Table.ContextNodeNgrams
+import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
+import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId)
+import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
+import Gargantext.Database.Schema.Context (context_hyperdata, context_id)
 import Gargantext.Database.Schema.Ngrams
 import Gargantext.Database.Types
 import Gargantext.Prelude
-import Gargantext.Core (toDBid)
+import Gargantext.Prelude.Crypto.Hash (Hash)


-data DocumentIdWithNgrams a b =
-     DocumentIdWithNgrams
-     { documentWithId :: Indexed NodeId a
-     , documentNgrams :: HashMap b (Map NgramsType Int, TermsCount)
-     } deriving (Show)
-
 insertDocNgrams :: ListId
                -> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (Int, TermsCount)))
                -> DBCmd err Int
@@ -52,3 +69,122 @@ insertDocNgrams lId m = do

 -- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
 -- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}
+
+
+
+
+
+docNgrams :: Lang
+          -> NgramsType
+          -> [NT.NgramsTerm]
+          -> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument
+          -> [((MatchedText, TermsCount),
+                Map NgramsType (Map NodeId Int))]
+docNgrams lang nt ts doc =
+  List.zip
+  (termsInText lang (buildPatternsWith lang ts)
+    $ T.unlines $ catMaybes
+    [ doc ^. context_hyperdata . hd_title
+    , doc ^. context_hyperdata . hd_abstract
+    ]
+  )
+  (List.cycle [DM.fromList $ [(nt, DM.singleton (doc ^. context_id) 1 )]])
+
+
+documentIdWithNgrams :: HasNodeError err
+                     => (a
+                     -> DBCmd err (HashMap.HashMap b (Map NgramsType Int, TermsCount)))
+                     -> [Indexed NodeId a]
+                     -> DBCmd err [DocumentIdWithNgrams a b]
+documentIdWithNgrams f = traverse toDocumentIdWithNgrams
+  where
+    toDocumentIdWithNgrams d = do
+      e <- f $ _unIndex         d
+      pure $ DocumentIdWithNgrams d e
+
+
+-- | TODO check optimization
+mapNodeIdNgrams :: (Ord b, Hashable b)
+                => [DocumentIdWithNgrams a b]
+                -> HashMap.HashMap b
+                       (Map NgramsType
+                            (Map NodeId (Int, TermsCount))
+                       )
+mapNodeIdNgrams = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
+  where
+    -- | NOTE We are somehow multiplying 'TermsCount' here: If the
+    -- same ngrams term has different ngrams types, the 'TermsCount'
+    -- for it (which is the number of times the terms appears in a
+    -- document) is copied over to all its types.
+    f :: DocumentIdWithNgrams a b
+      -> HashMap.HashMap b (Map NgramsType (Map NodeId (Int, TermsCount)))
+    f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\i -> DM.singleton nId (i, cnt)) ngramsTypeMap) $ documentNgrams d
+      where
+        nId = _index $ documentWithId d
+
+        
+-- TODO Type NodeDocumentUnicised
+insertDocs :: ( DbCmd' env err m
+              -- , FlowCorpus a
+              , FlowInsertDB a
+              , HasNodeError err
+              )
+              => UserId
+              -> CorpusId
+              -> [a]
+              -> m ([ContextId], [Indexed ContextId a])
+insertDocs uId cId hs = do
+  let docs = map addUniqId hs
+  newIds <- insertDb uId Nothing docs
+  -- printDebug "newIds" newIds
+  let
+    newIds' = map (nodeId2ContextId . reId) newIds
+    documentsWithId = mergeData (toInserted newIds) (DM.fromList $ map viewUniqId' docs)
+  _ <- Doc.add cId newIds'
+  pure (newIds', map (first nodeId2ContextId) documentsWithId)
+
+
+------------------------------------------------------------------------
+viewUniqId' :: UniqId a
+            => a
+            -> (Hash, a)
+viewUniqId' d = maybe err (\h -> (h,d)) (d ^. uniqId)
+      where
+        err = panicTrace "[ERROR] Database.Flow.toInsert"
+
+
+mergeData :: Map Hash ReturnId
+          -> Map Hash a
+          -> [Indexed NodeId a]
+mergeData rs = catMaybes . map toDocumentWithId . DM.toList
+  where
+    toDocumentWithId (sha,hpd) =
+      Indexed <$> fmap reId (DM.lookup sha rs)
+              <*> Just hpd
+
+
+
+
+toInserted :: [ReturnId]
+           -> Map Hash ReturnId
+toInserted =
+  DM.fromList . map    (\r -> (reUniqId r, r)     )
+              . filter (\r -> reInserted r == True)
+
+
+
+-- Apparently unused functions
+
+
+-- | TODO putelsewhere
+-- | Upgrade function
+-- Suppose all documents are English (this is the case actually)
+-- indexAllDocumentsWithPosTag :: ( HasNodeStory env err m
+--                                , HasNLPServer env )
+--                             => m ()
+-- indexAllDocumentsWithPosTag = do
+--   rootId    <- getRootId (UserName userMaster)
+--   corpusIds <- findNodesId rootId [NodeCorpus]
+--   docs      <- List.concat <$> mapM getDocumentsWithParentId corpusIds
+--   _ <- mapM extractInsert (splitEvery 1000 docs)
+--   pure ()
--- a/src/Gargantext/Database/Action/Metrics.hs
+++ b/src/Gargantext/Database/Action/Metrics.hs
@@ -30,7 +30,7 @@ import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
 import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
 import Gargantext.Core (HasDBid(toDBid))
-import Gargantext.Core.NodeStory hiding (runPGSQuery)
+import Gargantext.Core.NodeStory
 import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
 import Gargantext.Core.Types (ListType(..), NodeType(..), ContextId, contextId2NodeId)
 import Gargantext.Core.Types.Query (Limit(..))
@@ -74,7 +74,8 @@ getNgramsCooc cId lId tabType maybeLimit = do
 ------------------------------------------------------------------------
 ------------------------------------------------------------------------
 updateNgramsOccurrences :: (HasNodeStory env err m)
-                        => CorpusId -> ListId
+                        => CorpusId
+                        -> ListId
                        -> m ()
 updateNgramsOccurrences cId lId = do
  _ <- mapM (updateNgramsOccurrences' cId lId Nothing) [Terms, Sources, Authors, Institutes]