[ngrams] first version of ngrams counting in docs

3b163685 · Przemyslaw Kaminski · 0d6a5eaa · 3b163685 · 3b163685 · 3b163685
Commit 3b163685 authored Oct 27, 2022 by Przemyslaw Kaminski
34 changed files
--- a/README.md
+++ b/README.md
@@ -196,6 +196,8 @@ To build documentation, run:
 stack --docker build --haddock --no-haddock-deps --fast
 ```

+(in `.stack-work/dist/x86_64-linux-nix/Cabal-3.2.1.0/doc/html/gargantext`).
+
 ## GraphQL

 Some introspection information.

--- a/bin/gargantext-upgrade/Main.hs
+++ b/bin/gargantext-upgrade/Main.hs
@@ -33,7 +33,6 @@ import Gargantext.Database.Admin.Trigger.Init
 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
 import Gargantext.Database.Admin.Types.Node (NodeType(NodeDocument, NodeContact))
 import Gargantext.Database.Prelude (Cmd'', Cmd, execPGSQuery)
-import Gargantext.Database.Query.Table.Node (getOrMkList)
 import Gargantext.Prelude
 import Gargantext.Prelude.Config (GargConfig(..), readConfig)
 import Prelude (getLine)
@@ -67,7 +66,7 @@ main = do
  _ok  <- getLine

  cfg       <- readConfig         iniPath
-  let secret = _gc_secretkey cfg
+  let _secret = _gc_secretkey cfg

  withDevEnv iniPath $ \env -> do
    -- First upgrade the Database Schema

--- a/devops/docker/docker-compose.yaml
+++ b/devops/docker/docker-compose.yaml
@@ -24,7 +24,7 @@ services:
    network_mode: host
    #command: ["postgres", "-c", "log_statement=all"]
    #ports:
-      #- 5432:5432
+    #  - 5432:5432
    environment:
      POSTGRES_USER: gargantua
      POSTGRES_PASSWORD: C8kdcUrAQy66U

--- a/devops/postgres/schema.sql
+++ b/devops/postgres/schema.sql
@@ -164,6 +164,7 @@ CREATE TABLE public.context_node_ngrams (
    ngrams_id     INTEGER NOT NULL REFERENCES public.ngrams   (id) ON DELETE CASCADE,
    ngrams_type   INTEGER  ,
    weight double precision,
+    doc_count     INTEGER,
    PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
  );
 ALTER TABLE public.context_node_ngrams OWNER TO gargantua;

--- a/devops/postgres/upgrade/0.0.6.7.3.sql
+++ b/devops/postgres/upgrade/0.0.6.7.3.sql
+ALTER TABLE context_node_ngrams
+  ADD COLUMN doc_count INTEGER;
--- a/gargantext.cabal
+++ b/gargantext.cabal
 cabal-version: 1.12

-- This file has been generated from package.yaml by hpack version 0.35.0.
+-- This file has been generated from package.yaml by hpack version 0.34.7.
 --
 -- see: https://github.com/sol/hpack

 name:           gargantext
-version: 0.0.6.8.2
+version:        0.0.6.8.2
 synopsis:       Search, map, share
 description:    Please see README.md
 category:       Data
@@ -50,6 +50,7 @@ library
      Gargantext.Core.Types
      Gargantext.Core.Types.Individu
      Gargantext.Core.Types.Main
+      Gargantext.Core.Utils
      Gargantext.Core.Utils.Prefix
      Gargantext.Utils.Jobs
      Gargantext.Utils.Jobs.API
@@ -87,6 +88,7 @@ library
      Gargantext.Core.Text.Prepare
      Gargantext.Core.Text.Search
      Gargantext.Core.Text.Terms
+      Gargantext.Core.Text.Terms.Eleve
      Gargantext.Core.Text.Terms.Mono
      Gargantext.Core.Text.Terms.Multi.Lang.En
      Gargantext.Core.Text.Terms.Multi.Lang.Fr
@@ -211,7 +213,6 @@ library
      Gargantext.Core.Text.Samples.EN
      Gargantext.Core.Text.Samples.FR
      Gargantext.Core.Text.Samples.SP
-      Gargantext.Core.Text.Terms.Eleve
      Gargantext.Core.Text.Terms.Mono.Stem
      Gargantext.Core.Text.Terms.Mono.Stem.En
      Gargantext.Core.Text.Terms.Mono.Token
@@ -221,7 +222,6 @@ library
      Gargantext.Core.Text.Terms.Multi.PosTagging
      Gargantext.Core.Text.Terms.Multi.PosTagging.Types
      Gargantext.Core.Types.Phylo
-      Gargantext.Core.Utils
      Gargantext.Core.Utils.DateUtils
      Gargantext.Core.Viz
      Gargantext.Core.Viz.Chart
@@ -809,6 +809,7 @@ test-suite garg-test
      Core.Text
      Core.Text.Examples
      Core.Text.Flow
+      Core.Utils
      Graph.Clustering
      Graph.Distance
      Ngrams.Lang

--- a/package.yaml
+++ b/package.yaml
@@ -74,6 +74,7 @@ library:
  - Gargantext.Core.Types
  - Gargantext.Core.Types.Individu
  - Gargantext.Core.Types.Main
+  - Gargantext.Core.Utils
  - Gargantext.Core.Utils.Prefix
  - Gargantext.Utils.Jobs
  - Gargantext.Utils.Jobs.API
@@ -111,6 +112,7 @@ library:
  - Gargantext.Core.Text.Prepare
  - Gargantext.Core.Text.Search
  - Gargantext.Core.Text.Terms
+  - Gargantext.Core.Text.Terms.Eleve
  - Gargantext.Core.Text.Terms.Mono
  - Gargantext.Core.Text.Terms.Multi.Lang.En
  - Gargantext.Core.Text.Terms.Multi.Lang.Fr

--- a/src-test/Core/Text.hs
+++ b/src-test/Core/Text.hs
-
 {-|
 Module      : Graph.Clustering
 Description : Basic tests to avoid quick regression

--- a/src-test/Core/Utils.hs
+++ b/src-test/Core/Utils.hs
+{-|
+Module      : Core.Utils
+Description :
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+-}
+
+module Core.Utils where
+
+import Test.Hspec
+
+import Gargantext.Prelude
+
+import Gargantext.Core.Utils
+
+-- | Core.Utils tests
+test :: IO ()
+test = hspec $ do
+  describe "check if groupWithCounts works" $ do
+    it "simple integer array" $ do
+      (groupWithCounts [1, 2, 3, 1, 2, 3]) `shouldBe` [(1, 2), (2, 2), (3, 2)]
+
+    it "string" $ do
+      (groupWithCounts "abccba") `shouldBe` [('a', 2), ('b', 2), ('c', 2)]
--- a/src-test/Graph/Clustering.hs
+++ b/src-test/Graph/Clustering.hs
-
 {-|
 Module      : Graph.Clustering
 Description : Basic tests to avoid quick regression
@@ -40,4 +39,3 @@ test = hspec $ do
      let
        result = List.length partitions > 1
      shouldBe True result
-
--- a/src-test/Main.hs
+++ b/src-test/Main.hs
@@ -11,6 +11,8 @@ Portability : POSIX

 import Gargantext.Prelude
 import Gargantext.Core (Lang(..))
+
+import qualified Core.Utils as Utils
 --import qualified Ngrams.Lang.Fr as Fr
 --import qualified Ngrams.Lang as Lang
 import qualified Ngrams.Lang.Occurrences as Occ
@@ -22,11 +24,12 @@ import qualified Utils.Crypto            as Crypto

 main :: IO ()
 main = do
+  Utils.test
 --    Occ.parsersTest
 --    Lang.ngramsExtractionTest FR
 --    Lang.ngramsExtractionTest EN
 --    Metrics.main
-  Graph.test 
+  Graph.test
  PD.testFromRFC3339
 --    GD.test
  Crypto.test
--- a/src-test/Utils/Crypto.hs
+++ b/src-test/Utils/Crypto.hs
-
 {-|
 Module      : Utils.Crypto
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -43,4 +42,3 @@ test = hspec $ do
    let hash2 = hash (["b","a"] :: [Text])
    it "compare" $ do
      hash1 `shouldBe` hash2
-
--- a/src/Gargantext/API/Ngrams.hs
+++ b/src/Gargantext/API/Ngrams.hs
@@ -184,9 +184,9 @@ saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
 saveNodeStory = do
  saver <- view hasNodeStorySaver
  liftBase $ do
-    Gargantext.Prelude.putStrLn "---- Running node story saver ----"
+    --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
    saver
-    Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
+    --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"


 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
@@ -194,9 +194,9 @@ saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmed
 saveNodeStoryImmediate = do
  saver <- view hasNodeStoryImmediateSaver
  liftBase $ do
-    Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
+    --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
    saver
-    Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
+    --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"


 listTypeConflictResolution :: ListType -> ListType -> ListType

--- a/src/Gargantext/API/Ngrams/List.hs
+++ b/src/Gargantext/API/Ngrams/List.hs
@@ -159,15 +159,15 @@ reIndexWith cId lId nt lts = do
  -- TODO Tests here
  let
    ngramsByDoc = map (HashMap.fromList)
-                $ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)))
-                $ map (\doc -> List.zip
-                                (termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
-                                             $ Text.unlines $ catMaybes
-                                               [ doc ^. context_hyperdata . hd_title
-                                               , doc ^. context_hyperdata . hd_abstract
-                                               ]
+                  $ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
+                  $ map (\doc -> List.zip
+                                 (termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
+                                  $ Text.unlines $ catMaybes
+                                  [ doc ^. context_hyperdata . hd_title
+                                  , doc ^. context_hyperdata . hd_abstract
+                                  ]
                                 )
-                                (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
+                                 (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
                        ) docs

  -- printDebug "ngramsByDoc" ngramsByDoc

--- a/src/Gargantext/API/Ngrams/Tools.hs
+++ b/src/Gargantext/API/Ngrams/Tools.hs
@@ -195,7 +195,7 @@ getCoocByNgrams' f (Diagonal diag) m =
                                listToCombi identity ks
              ]

-  where 
+  where
    ks = HM.keys m

 -- TODO k could be either k1 or k2 here
@@ -220,7 +220,7 @@ getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
                                -- TODO check optim
                                -- listToCombi identity ks1
              ]
-  where 
+  where
    ks1 = HM.keys m1
    ks2 = HM.keys m2


--- a/src/Gargantext/Core/NodeStory.hs
+++ b/src/Gargantext/Core/NodeStory.hs
@@ -455,7 +455,7 @@ insertArchiveList c nodeId a = do
  where
    query :: PGS.Query
    query = [sql| WITH s as (SELECT ? as sid, ? sversion, ? sngrams_type_id, ngrams.id as sngrams_id, ?::jsonb as srepo FROM ngrams WHERE terms = ?)
-                  INSERT INTO node_stories(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element) 
+                  INSERT INTO node_stories(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
                  SELECT s.sid, s.sversion, s.sngrams_type_id, s.sngrams_id, s.srepo from s s join nodes n on s.sid = n.id
            |]

@@ -505,22 +505,22 @@ updateNodeStory c nodeId@(NodeId _nId) currentArchive newArchive = do
  --printDebug "[updateNodeStory] updates" $ Text.unlines $ (Text.pack . show) <$> updates

  -- 2. Perform inserts/deletes/updates
-  printDebug "[updateNodeStory] applying insert" ()
+  --printDebug "[updateNodeStory] applying insert" ()
  insertArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version
                                       , _a_history = []
                                       , _a_state = archiveStateFromList inserts }
-  printDebug "[updateNodeStory] insert applied" ()
+  --printDebug "[updateNodeStory] insert applied" ()
    --TODO Use currentArchive ^. a_version in delete and report error
  -- if entries with (node_id, ngrams_type_id, ngrams_id) but
  -- different version are found.
  deleteArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version
                                       , _a_history = []
                                       , _a_state = archiveStateFromList deletes }
-  printDebug "[updateNodeStory] delete applied" ()
+  --printDebug "[updateNodeStory] delete applied" ()
  updateArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version
                                       , _a_history = []
                                       , _a_state = archiveStateFromList updates }
-  printDebug "[updateNodeStory] update applied" ()
+  --printDebug "[updateNodeStory] update applied" ()

  pure ()
  -- where

--- a/src/Gargantext/Core/Text/Terms.hs
+++ b/src/Gargantext/Core/Text/Terms.hs
@@ -28,8 +28,9 @@ compute graph

 -}

-{-# LANGUAGE TemplateHaskell   #-}
 {-# LANGUAGE ConstrainedClassMethods #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TemplateHaskell   #-}

 module Gargantext.Core.Text.Terms
  where
@@ -47,6 +48,7 @@ import qualified Data.Set            as Set
 import qualified Data.Text           as Text
 import qualified Data.HashMap.Strict as HashMap
 import Gargantext.Core
+import Gargantext.Core.Utils (groupWithCounts)
 import Gargantext.Core.Text (sentences, HasText(..))
 import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
 import Gargantext.Core.Text.Terms.Mono  (monoTerms)
@@ -70,24 +72,23 @@ data TermType lang
                 , _tt_model      :: !(Maybe (Tries Token ()))
                 }
  deriving (Generic)
+deriving instance (Show lang) => Show (TermType lang)

 makeLenses ''TermType
 --group :: [Text] -> [Text]
 --group = undefined

 -- remove Stop Words
-- map (filter (\t -> not . elem t)) $ 
+-- map (filter (\t -> not . elem t)) $
 ------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user).
+-- | Sugar to extract terms from text (hidding 'mapM' from end user).
 --extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
-extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
-
+extractTerms :: TermType Lang -> [Text] -> IO [[TermsWithCount]]
 extractTerms (Unsupervised {..}) xs = mapM (terms (Unsupervised { _tt_model = Just m', .. })) xs
  where
    m' = case _tt_model of
      Just m''-> m''
      Nothing -> newTries _tt_windowSize (Text.intercalate " " xs)
-
 extractTerms termTypeLang xs = mapM (terms termTypeLang) xs


@@ -116,12 +117,13 @@ data ExtractedNgrams = SimpleNgrams   { unSimpleNgrams   :: Ngrams       }

 instance Hashable ExtractedNgrams

+-- | A typeclass that represents extracting ngrams from an entity.
 class ExtractNgramsT h
  where
    extractNgramsT :: HasText h
                   => TermType Lang
                   -> h
-                   -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
+                   -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
 ------------------------------------------------------------------------
 enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
 enrichedTerms l pa po (Terms ng1 ng2) =
@@ -132,7 +134,7 @@ enrichedTerms l pa po (Terms ng1 ng2) =

 ------------------------------------------------------------------------
 cleanNgrams :: Int -> Ngrams -> Ngrams
-cleanNgrams s ng 
+cleanNgrams s ng
      | Text.length (ng ^. ngramsTerms) < s = ng
      | otherwise                           = text2ngrams (Text.take s (ng ^. ngramsTerms))

@@ -151,10 +153,10 @@ insertExtractedNgrams ngs = do
  let (s, e) = List.partition isSimpleNgrams ngs
  m1 <- insertNgrams       (map unSimpleNgrams   s)
  --printDebug "others" m1
-  
+
  m2 <- insertNgramsPostag (map unEnrichedNgrams e)
  --printDebug "terms" m2
- 
+
  let result = HashMap.union m1 m2
  pure result

@@ -163,43 +165,48 @@ isSimpleNgrams (SimpleNgrams _) = True
 isSimpleNgrams _                = False

 ------------------------------------------------------------------------
-- | Terms from Text
-- Mono : mono terms
-- Multi : multi terms
-- MonoMulti : mono and multi
+-- | Terms from 'Text'
+-- 'Mono' : mono terms
+-- 'Multi' : multi terms
+-- 'MonoMulti' : mono and multi
 -- TODO : multi terms should exclude mono (intersection is not empty yet)
-terms :: TermType Lang -> Text -> IO [Terms]
-terms (Mono      lang) txt = pure $ monoTerms lang txt
-terms (Multi     lang) txt = multiterms lang txt
-terms (MonoMulti lang) txt = terms (Multi lang) txt
-terms (Unsupervised { .. }) txt = termsUnsupervised (Unsupervised { _tt_model = Just m', .. }) txt
+terms :: TermType Lang -> Text -> IO [TermsWithCount]
+terms tt txt = do
+  printDebug "[terms] tt" tt
+  printDebug "[terms] txt" txt
+  out <- termsNoLog tt txt
+  printDebug "[terms] out" out
+  pure out
+termsNoLog :: TermType Lang -> Text -> IO [TermsWithCount]
+termsNoLog (Mono      lang) txt = pure $ monoTerms lang txt
+termsNoLog (Multi     lang) txt = multiterms lang txt
+termsNoLog (MonoMulti lang) txt = terms (Multi lang) txt
+termsNoLog (Unsupervised { .. }) txt = pure $ termsUnsupervised (Unsupervised { _tt_model = Just m', .. }) txt
  where
    m' = maybe (newTries _tt_ngramsSize txt) identity _tt_model
 -- terms (WithList  list) txt = pure . concat $ extractTermsWithList list txt


 ------------------------------------------------------------------------
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: newtype BlockText
-
 type WindowSize = Int
 type MinNgramSize = Int

-termsUnsupervised :: TermType Lang -> Text -> IO [Terms]
-termsUnsupervised (Unsupervised l n s m) =
-               pure
-             . map (text2term l)
-             . List.nub
-             . (List.filter (\l' -> List.length l' >= s))
+-- | Unsupervised ngrams extraction
+-- language agnostic extraction
+-- TODO: newtype BlockText
+termsUnsupervised :: TermType Lang -> Text -> [TermsWithCount]
+termsUnsupervised (Unsupervised { _tt_model = Nothing }) = panic "[termsUnsupervised] no model"
+termsUnsupervised (Unsupervised { _tt_model = Just _tt_model, .. }) =
+               map (\(t, cnt) -> (text2term _tt_lang t, cnt))
+             . groupWithCounts
+             -- . List.nub
+             . (List.filter (\l' -> List.length l' >= _tt_windowSize))
             . List.concat
-             . mainEleveWith (maybe (panic "no model") identity m) n
+             . mainEleveWith _tt_model _tt_ngramsSize
             . uniText
 termsUnsupervised _ = undefined


-
 newTries :: Int -> Text -> Tries Token ()
 newTries n t = buildTries n (fmap toToken $ uniText t)

@@ -217,5 +224,3 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
 isPunctuation :: Text -> Bool
 isPunctuation x = List.elem x $  (Text.pack . pure)
                             <$> ("!?(),;.:" :: String)
-
-
--- a/src/Gargantext/Core/Text/Terms/Eleve.hs
+++ b/src/Gargantext/Core/Text/Terms/Eleve.hs
@@ -32,6 +32,7 @@ Notes for current implementation:

 -}
 {-# LANGUAGE ConstraintKinds   #-}
+{-# LANGUAGE StandaloneDeriving   #-}
 {-# LANGUAGE TemplateHaskell   #-}
 {-# LANGUAGE TypeFamilies      #-}

@@ -278,6 +279,8 @@ data Tries k e = Tries

 makeLenses ''Tries

+deriving instance (Show k, Show e) => Show (Tries k e)
+
 buildTries :: Int -> [[Token]] -> Tries Token ()
 buildTries n sentences = Tries
  { _fwd = buildTrie Forward  n sentences

--- a/src/Gargantext/Core/Text/Terms/Mono.hs
+++ b/src/Gargantext/Core/Text/Terms/Mono.hs
@@ -40,8 +40,8 @@ words = monoTexts
 isSep :: Char -> Bool
 isSep = (`elem` (",.:;?!(){}[]\"\'" :: String))

-monoTerms :: Lang -> Text -> [Terms]
-monoTerms l txt = map (monoText2term l) $ monoTexts txt
+monoTerms :: Lang -> Text -> [TermsWithCount]
+monoTerms l txt = map (\t -> (monoText2term l t, 1)) $ monoTexts txt

 monoTexts :: Text -> [Text]
 monoTexts = L.concat . monoTextsBySentence

--- a/src/Gargantext/Core/Text/Terms/Multi.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi.hs
@@ -21,6 +21,7 @@ import Data.List (concat)
 import Gargantext.Prelude
 import Gargantext.Core (Lang(..))
 import Gargantext.Core.Types
+import Gargantext.Core.Utils (groupWithCounts)

 import Gargantext.Core.Text.Terms.Multi.PosTagging
 import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
@@ -37,14 +38,16 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
 type NLP_API = Lang -> Text -> IO PosSentences

 -------------------------------------------------------------------
-multiterms :: Lang -> Text -> IO [Terms]
-multiterms = multiterms' tokenTag2terms
+multiterms :: Lang -> Text -> IO [TermsWithCount]
+multiterms l txt = do
+  ret <- multiterms' tokenTag2terms l txt
+  pure $ groupWithCounts ret
  where
    multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a]
-    multiterms' f lang txt = concat
+    multiterms' f lang txt' = concat
                       <$> map (map f)
                       <$> map (filter (\t -> _my_token_pos t == Just NP))
-                       <$> tokenTags lang txt
+                       <$> tokenTags lang txt'

 -------------------------------------------------------------------
 tokenTag2terms :: TokenTag -> Terms
@@ -57,10 +60,10 @@ tokenTags l  _   = panic $ "[G.C.T.T.Multi] Lang NLP API not implemented yet " <

 tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
 tokenTagsWith lang txt nlp = map (groupTokens lang)
-                          <$> map tokens2tokensTags
-                          <$> map _sentenceTokens
-                          <$> _sentences
-                          <$> nlp lang txt
+                         <$> map tokens2tokensTags
+                         <$> map _sentenceTokens
+                         <$> _sentences
+                         <$> nlp lang txt


 ---- | This function analyses and groups (or not) ngrams according to

--- a/src/Gargantext/Core/Text/Terms/Multi/Group.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/Group.hs
@@ -30,4 +30,3 @@ group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
 group2 _ _ [x@(TokenTag _ _ (Just _) _)] = [x]
 group2 p1 p2 (x@(TokenTag _ _ (Just _) _):y@(TokenTag _ _ Nothing _):z) = (x:y: group2 p1 p2 (y:z))
 group2 _ _ [] = []
-
--- a/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
@@ -40,4 +40,3 @@ groupTokens ntags = group2 NP NP
 --groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs)         = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
 --
 --groupNgrams (x:xs)                                          = (x:(groupNgrams xs))
-
--- a/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
@@ -45,19 +45,22 @@ tokens2tokensTags :: [Token] -> [TokenTag]
 tokens2tokensTags ts = filter' $ map tokenTag ts
 ------------------------------------------------------------------------
 tokenTag :: Token -> TokenTag
-tokenTag (Token _ w _ l _ _ p n _ _) = TokenTag w' l' p n
+tokenTag (Token { .. }) = TokenTag { _my_token_word = w'
+                                   , _my_token_lemma = l'
+                                   , _my_token_pos = _tokenPos
+                                   , _my_token_ner = _tokenNer }
  where
-    w' = split w
-    l' = fromList (split l)
+    w' = split _tokenWord
+    l' = fromList (split _tokenLemma)
    split = splitOn (pack " ") . toLower

 filter' :: [TokenTag] -> [TokenTag]
 filter' xs = filter isNgrams xs
    where
-      isNgrams (TokenTag _ _ p n) = isJust p || isJust n
+      isNgrams (TokenTag { .. }) = isJust _my_token_pos || isJust _my_token_ner

 ------------------------------------------------------------------------
-- request = 
+-- request =
 -- "fr" : {
 --                 "tokenize.language" : "fr",
 --                 "pos.model" : "edu/stanford/nlp/models/pos-tagger/french/french.tagger",
@@ -66,9 +69,9 @@ filter' xs = filter isNgrams xs
 --                 "depparse.model" : "edu/stanford/nlp/models/parser/nndep/UD_French.gz",
 --                 "depparse.language" : "french",
 --                 "ner.model":  DATA_ROOT+"/eunews.fr.crf.gz",
--                 "ssplit.newlineIsSentenceBreak": "always" 
+--                 "ssplit.newlineIsSentenceBreak": "always"
 --             },
-- 
+--

 corenlp' :: ( FromJSON a
            , ConvertibleStrings p ByteString
@@ -80,6 +83,7 @@ corenlp' lang txt = do
            FR -> "{\"annotators\": \"tokenize,ssplit,pos,lemma,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
            _  -> panic $ pack "not implemented yet"
    url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
+    -- curl -XPOST 'http://localhost:9000/?properties=%7B%22annotators%22:%20%22tokenize,ssplit,pos,ner%22,%20%22outputFormat%22:%20%22json%22%7D' -d 'hello world, hello' | jq .
    let request = setRequestBodyLBS (cs txt) url
    httpJSON request


--- a/src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
@@ -52,4 +52,3 @@ data PosSentences = PosSentences { _sentences :: [Sentence]}
  deriving (Show, Generic)

 $(deriveJSON (unPrefix "_") ''PosSentences)
-
--- a/src/Gargantext/Core/Text/Terms/WithList.hs
+++ b/src/Gargantext/Core/Text/Terms/WithList.hs
 {-|
 Module      : Gargantext.Core.Text.Terms.WithList
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -21,6 +21,8 @@ import Data.Text (Text, concat, unwords)
 import Gargantext.Prelude
 import Gargantext.Core.Text.Context
 import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence)
+import Gargantext.Core.Types (TermsCount)
+import Gargantext.Core.Utils (groupWithCounts)
 import Prelude (error)
 import qualified Data.Algorithms.KMP as KMP
 import qualified Data.IntMap.Strict  as IntMap
@@ -72,8 +74,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
 -- Utils
 type BlockText   = Text
 type MatchedText = Text
-termsInText :: Patterns -> BlockText -> [MatchedText]
-termsInText pats txt = List.nub
+termsInText :: Patterns -> BlockText -> [(MatchedText, TermsCount)]
+termsInText pats txt = groupWithCounts
                     $ List.concat
                     $ map (map unwords)
                     $ extractTermsWithList pats txt
@@ -96,7 +98,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms pats)
 {- | Not used
 filterWith :: TermList
           -> (a -> Text)
-           -> [a] 
+           -> [a]
           -> [(a, [Text])]
 filterWith termList f xs = filterWith' termList f zip xs

@@ -104,7 +106,7 @@ filterWith termList f xs = filterWith' termList f zip xs
 filterWith' :: TermList
           -> (a -> Text)
           -> ([a] -> [[Text]] -> [b])
-           -> [a] 
+           -> [a]
           -> [b]
 filterWith' termList f f' xs = f' xs
                            $ map (extractTermsWithList' pats)

--- a/src/Gargantext/Core/Types.hs
+++ b/src/Gargantext/Core/Types.hs
 {-|
 Module      : Gargantext.Types
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -17,7 +17,7 @@ commentary with @some markup@.
 module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
                             , module Gargantext.Database.Admin.Types.Node
                             , DebugMode(..), withDebugMode
-                             , Term, Terms(..)
+                             , Term, Terms(..), TermsCount, TermsWithCount
                             , TokenTag(..), POS(..), NER(..)
                             , Label, Stems
                             , HasInvalidError(..), assertValid
@@ -70,10 +70,13 @@ type Label = [Text]
 data Terms = Terms { _terms_label :: Label
                   , _terms_stem  :: Stems
                   } deriving (Ord, Show)
-
 instance Eq Terms where
  (==) (Terms _ s1) (Terms _ s2) = s1 == s2

+type TermsCount = Int
+
+type TermsWithCount = (Terms, TermsCount)
+
 ------------------------------------------------------------------------
 data Tag = POS | NER
  deriving (Show, Eq)
@@ -208,5 +211,3 @@ data TODO = TODO
 instance ToSchema TODO where
 instance ToParamSchema TODO where
 ----------------------------------------------------------------------------
-
-
--- a/src/Gargantext/Core/Utils.hs
+++ b/src/Gargantext/Core/Utils.hs
 {-|
 Module      : Gargantext.Utils
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -12,16 +12,19 @@ commentary with @some markup@.
 -}


-module Gargantext.Core.Utils ( 
+module Gargantext.Core.Utils (
                           -- module Gargantext.Utils.Chronos
                             module Gargantext.Core.Utils.Prefix
                           , something
                           , alphanum
                           , choices
                           , randomString
+                           , groupWithCounts
+                           , addTuples
                          ) where

 import Data.Char (chr, ord)
+import qualified Data.List as List
 import Data.Maybe
 import Data.Monoid
 import Data.Text (Text, pack)
@@ -57,3 +60,17 @@ randomString :: Int -> IO Text
 randomString num = do
  str <- choices num alphanum
  pure $ pack str
+
+
+-- | Given a list of items of type 'a', return list with unique items
+-- (like List.nub) but tuple-d with their counts in the original list
+groupWithCounts :: (Ord a, Eq a) => [a] -> [(a, Int)]
+groupWithCounts = map f
+                . List.group
+                . List.sort
+  where
+    f [] = panic "[groupWithCounts] impossible"
+    f ts@(t:_) = (t, length ts)
+
+addTuples :: (Num a, Num b) => (a, b) -> (a, b) -> (a, b)
+addTuples (a1, b1) (a2, b2) = (a1 + a2, b1 + b2)
--- a/src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
+++ b/src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
@@ -65,7 +65,7 @@ flowPhylo cId = do
    patterns = buildPatterns termList
    -- | To filter the Ngrams of a document based on the termList
    filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
-    filterTerms patterns' (y,d) = (y,termsInText patterns' d)
+    filterTerms patterns' (y,d) = (y, fst <$> termsInText patterns' d)

    docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'

@@ -123,4 +123,3 @@ writePhylo _fp _phview = undefined
 -- refactor 2021
 -- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
 -- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
-
--- a/src/Gargantext/Database/Action/Flow.hs
+++ b/src/Gargantext/Database/Action/Flow.hs
@@ -48,7 +48,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
    where

 import Conduit
-import Control.Lens ((^.), view, _Just, makeLenses)
+import Control.Lens ((^.), view, _Just, makeLenses, over, traverse)
 import Data.Aeson.TH (deriveJSON)
 import Data.Conduit.Internal (zipSources)
 import Data.Either
@@ -60,7 +60,6 @@ import Data.Maybe (catMaybes)
 import Data.Monoid
 import Data.Swagger
 import qualified Data.Text as T
-import Data.Traversable (traverse)
 import Data.Tuple.Extra (first, second)
 import GHC.Generics (Generic)
 import Servant.Client (ClientError)
@@ -83,9 +82,10 @@ import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(.
 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
 import Gargantext.Core.Text.Terms
 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
-import Gargantext.Core.Types (POS(NP))
+import Gargantext.Core.Types (POS(NP), TermsCount)
 import Gargantext.Core.Types.Individu (User(..))
 import Gargantext.Core.Types.Main
+import Gargantext.Core.Utils (addTuples)
 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
 import Gargantext.Database.Action.Flow.List
 import Gargantext.Database.Action.Flow.Types
@@ -284,7 +284,7 @@ flow c u cn la mfslw (mLength, docsC) logStatus = do
                           , _scst_events    = Just []
                           }
      pure $ Prelude.head id
-      
+


 ------------------------------------------------------------------------
@@ -357,25 +357,27 @@ insertMasterDocs c lang hs  =  do
  -- this will enable global database monitoring

  -- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
-  mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
+  mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
                <- mapNodeIdNgrams
                <$> documentIdWithNgrams
-                    (extractNgramsT $ withLang lang documentsWithId)
-                    documentsWithId
+                      (extractNgramsT $ withLang lang documentsWithId)
+                      documentsWithId

  lId      <- getOrMkList masterCorpusId masterUserId
+  -- _ <- saveDocNgramsWith lId mapNgramsDocs'
  _ <- saveDocNgramsWith lId mapNgramsDocs'

  -- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
  pure ids'

-saveDocNgramsWith :: ( FlowCmdM env err m)
+saveDocNgramsWith :: (FlowCmdM env err m)
                  => ListId
-                  -> HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
+                  -> HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
                  -> m ()
 saveDocNgramsWith lId mapNgramsDocs' = do
-  terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
-  --printDebug "terms2id" terms2id
+  printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
+  let mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
+  terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocsNoCount

  let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'

@@ -392,7 +394,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
                                            <*> Just (fromIntegral w :: Double)
                       | (terms'', mapNgramsTypes)      <- HashMap.toList mapNgramsDocs
                       , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
-                       , (nId, w)                       <- Map.toList mapNodeIdWeight
+                       , (nId, (w, _cnt))                       <- Map.toList mapNodeIdWeight
                       ]

  -- to be removed
@@ -451,28 +453,32 @@ mergeData rs = catMaybes . map toDocumentWithId . Map.toList
 ------------------------------------------------------------------------
 documentIdWithNgrams :: HasNodeError err
                     => (a
-                     -> Cmd err (HashMap b (Map NgramsType Int)))
+                     -> Cmd err (HashMap b (Map NgramsType Int, TermsCount)))
                     -> [Indexed NodeId a]
                     -> Cmd err [DocumentIdWithNgrams a b]
 documentIdWithNgrams f = traverse toDocumentIdWithNgrams
  where
    toDocumentIdWithNgrams d = do
      e <- f $ _unIndex         d
-      pure   $ DocumentIdWithNgrams d e
+      pure $ DocumentIdWithNgrams d e


 -- | TODO check optimization
 mapNodeIdNgrams :: (Ord b, Hashable b)
                => [DocumentIdWithNgrams a b]
                -> HashMap b
-                       (Map NgramsType 
-                            (Map NodeId Int)
+                       (Map NgramsType
+                            (Map NodeId (Int, TermsCount))
                       )
-mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
+mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith addTuples)) . fmap f
  where
+    -- | NOTE We are somehow multiplying 'TermsCount' here: If the
+    -- same ngrams term has different ngrams types, the 'TermsCount'
+    -- for it (which is the number of times the terms appears in a
+    -- document) is copied over to all its types.
    f :: DocumentIdWithNgrams a b
-      -> HashMap b (Map NgramsType (Map NodeId Int))
-    f d = fmap (fmap (Map.singleton nId)) $ documentNgrams d
+      -> HashMap b (Map NgramsType (Map NodeId (Int, TermsCount)))
+    f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\i -> Map.singleton nId (i, cnt)) ngramsTypeMap) $ documentNgrams d
      where
        nId = _index $ documentWithId d

@@ -483,25 +489,25 @@ instance ExtractNgramsT HyperdataContact
    extractNgramsT l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
      where
        extract :: TermType Lang -> HyperdataContact
-                -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
+                -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
        extract _l hc' = do
          let authors = map text2ngrams
                      $ maybe ["Nothing"] (\a -> [a])
                      $ view (hc_who . _Just . cw_lastName) hc'

-          pure $ HashMap.fromList $ [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ]
+          pure $ HashMap.fromList $ [(SimpleNgrams a', (Map.singleton Authors 1, 1)) | a' <- authors ]


 instance ExtractNgramsT HyperdataDocument
  where
    extractNgramsT :: TermType Lang
                   -> HyperdataDocument
-                   -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
+                   -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
    extractNgramsT lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' lang hd
      where
        extractNgramsT' :: TermType Lang
                        -> HyperdataDocument
-                       -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
+                       -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
        extractNgramsT' lang' doc = do
          let source    = text2ngrams
                        $ maybe "Nothing" identity
@@ -515,23 +521,25 @@ instance ExtractNgramsT HyperdataDocument
                         $ maybe ["Nothing"] (T.splitOn ", ")
                         $ _hd_authors doc

-          terms' <- map (enrichedTerms (lang' ^. tt_lang) CoreNLP NP)
-                 <$> concat
-                 <$> liftBase (extractTerms lang' $ hasText doc)
+          termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang' ^. tt_lang) CoreNLP NP t, cnt))
+                              <$> concat
+                              <$> liftBase (extractTerms lang' $ hasText doc)
+          printDebug "[extractNgramsT HyperdataDocument] termsWithCounts'" termsWithCounts'
+          printDebug "[extractNgramsT HyperdataDocument] termsWithLargerCounts" $ filter (\(_, cnt) -> cnt > 1) termsWithCounts'

          pure $ HashMap.fromList
-               $  [(SimpleNgrams source, Map.singleton Sources     1)                    ]
-               <> [(SimpleNgrams     i', Map.singleton Institutes  1) | i' <- institutes ]
-               <> [(SimpleNgrams     a', Map.singleton Authors     1) | a' <- authors    ]
-               <> [(EnrichedNgrams   t', Map.singleton NgramsTerms 1) | t' <- terms'     ]
+               $  [(SimpleNgrams source, (Map.singleton Sources     1, 1))                    ]
+               <> [(SimpleNgrams     i', (Map.singleton Institutes  1, 1)) | i' <- institutes ]
+               <> [(SimpleNgrams     a', (Map.singleton Authors     1, 1)) | a' <- authors    ]
+               <> [(EnrichedNgrams   t', (Map.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts'     ]

 instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
  where
-    extractNgramsT l (Node _ _ _ _ _ _ _ h) = extractNgramsT l h
+    extractNgramsT l (Node { _node_hyperdata = h }) = extractNgramsT l h

 instance HasText a => HasText (Node a)
  where
-    hasText (Node _ _ _ _ _ _ _ h) = hasText h
+    hasText (Node { _node_hyperdata = h }) = hasText h




--- a/src/Gargantext/Database/Action/Flow/Utils.hs
+++ b/src/Gargantext/Database/Action/Flow/Utils.hs
@@ -15,6 +15,7 @@ module Gargantext.Database.Action.Flow.Utils

 import Data.Map (Map)
 import Data.HashMap.Strict (HashMap)
+import Gargantext.Core.Types (TermsCount)
 import Gargantext.Database.Admin.Types.Node
 import Gargantext.Database.Prelude (Cmd)
 import Gargantext.Database.Query.Table.ContextNodeNgrams
@@ -29,24 +30,24 @@ import qualified Data.HashMap.Strict as HashMap
 data DocumentIdWithNgrams a b =
     DocumentIdWithNgrams
     { documentWithId :: Indexed NodeId a
-     , documentNgrams :: HashMap b (Map NgramsType Int)
+     , documentNgrams :: HashMap b (Map NgramsType Int, TermsCount)
     } deriving (Show)

 insertDocNgrams :: ListId
-                -> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId Int))
+                -> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (Int, TermsCount)))
                -> Cmd err Int
-insertDocNgrams lId m = insertContextNodeNgrams ns
+insertDocNgrams lId m = do
+  printDebug "[insertDocNgrams] ns" ns
+  insertContextNodeNgrams ns
  where
    ns = [ ContextNodeNgrams docId lId (ng^.index)
                                   (ngramsTypeId t)
                                   (fromIntegral i)
+                                   cnt
         | (ng, t2n2i) <- HashMap.toList m
         , (t,  n2i)   <- DM.toList t2n2i
-         , (docId,  i)     <- DM.toList n2i
+         , (docId,  (i, cnt))     <- DM.toList n2i
         ]

-
-
-
-
-
+-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
+-- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}
--- a/src/Gargantext/Database/Query/Table/ContextNodeNgrams.hs
+++ b/src/Gargantext/Database/Query/Table/ContextNodeNgrams.hs
@@ -36,12 +36,13 @@ queryContextNodeNgramsTable = selectTable contextNodeNgramsTable
 -- | Insert utils
 insertContextNodeNgrams :: [ContextNodeNgrams] -> Cmd err Int
 insertContextNodeNgrams = insertContextNodeNgramsW
-                     . map (\(ContextNodeNgrams c n ng nt w) ->
+                     . map (\(ContextNodeNgrams c n ng nt w dc) ->
                              ContextNodeNgrams (pgContextId c)
                                                (pgNodeId n)
                                                (sqlInt4  ng)
                                                (pgNgramsTypeId nt)
                                                (sqlDouble w)
+                                                (sqlInt4 dc)
                            )

 insertContextNodeNgramsW :: [ContextNodeNgramsWrite] -> Cmd err Int

--- a/src/Gargantext/Database/Query/Table/Node/UpdateOpaleye.hs
+++ b/src/Gargantext/Database/Query/Table/Node/UpdateOpaleye.hs
@@ -35,8 +35,9 @@ updateHyperdata i h = mkCmd $ \c -> putStrLn "before runUpdate_" >>
 updateHyperdataQuery :: ToJSON a => NodeId -> a -> Update Int64
 updateHyperdataQuery i h = seq h' $ trace "updateHyperdataQuery: encoded JSON" $ Update
   { uTable      = nodeTable
-   , uUpdateWith = updateEasy (\  (Node _ni _nh _nt _nu _np _nn _nd _h)
-                                -> trace "updating mate" $ Node _ni _nh _nt _nu _np _nn _nd h'
+   , uUpdateWith = updateEasy (\  (Node { .. })
+                                -> Node { _node_hyperdata = h', .. }
+                               -- -> trace "updating mate" $ Node _ni _nh _nt _nu _np _nn _nd h'
                              )
   , uWhere      = (\row -> {-trace "uWhere" $-} _node_id row .== pgNodeId i )
   , uReturning  = rCount
@@ -63,5 +64,3 @@ updateNodesWithType_ :: ( HasNodeError err
 updateNodesWithType_ nt h = do
  ns <- getNodesIdWithType nt
  mapM (\n -> updateHyperdata n h) ns
-
-
--- a/src/Gargantext/Database/Schema/ContextNodeNgrams.hs
+++ b/src/Gargantext/Database/Schema/ContextNodeNgrams.hs
@@ -19,6 +19,7 @@ module Gargantext.Database.Schema.ContextNodeNgrams
  where

 import Prelude
+import Gargantext.Core.Types (TermsCount)
 import Gargantext.Database.Schema.Prelude
 import Gargantext.Database.Schema.Ngrams (NgramsTypeId, NgramsId)
 import Gargantext.Database.Admin.Types.Node
@@ -26,15 +27,16 @@ import Gargantext.Database.Admin.Types.Node


 type ContextNodeNgrams =
-  ContextNodeNgramsPoly ContextId ListId NgramsId NgramsTypeId Double
+  ContextNodeNgramsPoly ContextId ListId NgramsId NgramsTypeId Double TermsCount


-data ContextNodeNgramsPoly c n ngrams_id ngt w
+data ContextNodeNgramsPoly c n ngrams_id ngt w dc
   = ContextNodeNgrams { _cnng_context_id   :: !c
                       , _cnng_node_id      :: !n
                       , _cnng_ngrams_id    :: !ngrams_id
                       , _cnng_ngramsType   :: !ngt
                       , _cnng_weight       :: !w
+                       , _cnng_doc_count    :: !dc
                       } deriving (Show)

 type ContextNodeNgramsWrite =
@@ -43,6 +45,7 @@ type ContextNodeNgramsWrite =
                           (Column SqlInt4  )
                           (Column SqlInt4  )
                           (Column SqlFloat8)
+                           (Column SqlInt4  )

 type ContextNodeNgramsRead  =
     ContextNodeNgramsPoly (Column SqlInt4  )
@@ -50,6 +53,7 @@ type ContextNodeNgramsRead  =
                           (Column SqlInt4  )
                           (Column SqlInt4  )
                           (Column SqlFloat8)
+                           (Column SqlInt4  )

 type ContextNodeNgramsReadNull =
     ContextNodeNgramsPoly (Column (Nullable SqlInt4  ))
@@ -57,6 +61,7 @@ type ContextNodeNgramsReadNull =
                           (Column (Nullable SqlInt4  ))
                           (Column (Nullable SqlInt4  ))
                           (Column (Nullable SqlFloat8))
+                           (Column (Nullable SqlInt4  ))

 $(makeAdaptorAndInstance "pContextNodeNgrams" ''ContextNodeNgramsPoly)
 makeLenses ''ContextNodeNgramsPoly
@@ -70,5 +75,6 @@ contextNodeNgramsTable  = Table "context_node_ngrams"
                               , _cnng_ngrams_id  = requiredTableField "ngrams_id"
                               , _cnng_ngramsType = requiredTableField "ngrams_type"
                               , _cnng_weight     = requiredTableField "weight"
+                               , _cnng_doc_count  = requiredTableField "doc_count"
                               }
                          )
--- a/src/Gargantext/Database/Schema/NodeNgrams.hs
+++ b/src/Gargantext/Database/Schema/NodeNgrams.hs
 {-|
 Module      : Gargantext.Database.Schema.NodeNgrams
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -86,7 +86,7 @@ type NgramsClass  = Int
 type NgramsText   = Text

 -- Example of list Ngrams
-- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text 
+-- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text

 type NodeNgramsW =
  NodeNgramsPoly (Maybe NodeNgramsId) NodeId ListType NgramsText