Merge branch 'dev' into 151-dev-pubmed-api-key

dc52c519 · Przemyslaw Kaminski · c38275d0 · 93e711b1 · dc52c519 · dc52c519
Commit dc52c519 authored Nov 28, 2022 by Przemyslaw Kaminski
43 changed files
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
+## Version 0.0.6.8.5.1
+* [BACK][FIX] Indexing issue: taking all terms instead of longest of terms in case of ngrams included in others
+* [FRONT][FIX][Disconnection of instance causes a blank page (#464)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/464)
+* [BACK][FIX] ArXiv search in Abstracts by default
 ## Version 0.0.6.8.5
 * [BACK][FIX][Ngrams Table, page sort / limit (#149)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/149)
 * [FRONT][FIX][Security Issue with Teams (#452)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/452)
@@ -9,6 +15,7 @@
 ## Version 0.0.6.8.3
+* [INFO][UPGRADE] To upgrade use the 0.0.6.8.3.sql script
 * [FRONT][SECU][Security Issue with Teams (#452)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/452)
 * [FRONT][FEAT][Graph Explorer: disable controls when ForceAtlas is running (#451)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/451)
 * [FRONT][FIX][[Doc date filter] Filter by multiple dates (#450)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/450)

--- a/README.md
+++ b/README.md
@@ -196,6 +196,8 @@ To build documentation, run:
 stack --docker build --haddock --no-haddock-deps --fast
 ```
+(in `.stack-work/dist/x86_64-linux-nix/Cabal-3.2.1.0/doc/html/gargantext`).
 ## GraphQL
 Some introspection information.

--- a/bin/gargantext-upgrade/Main.hs
+++ b/bin/gargantext-upgrade/Main.hs
@@ -33,7 +33,6 @@ import Gargantext.Database.Admin.Trigger.Init
 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
 import Gargantext.Database.Admin.Types.Node (NodeType(NodeDocument, NodeContact))
 import Gargantext.Database.Prelude (Cmd'', Cmd, execPGSQuery)
-import Gargantext.Database.Query.Table.Node (getOrMkList)
 import Gargantext.Prelude
 import Gargantext.Prelude.Config (GargConfig(..), readConfig)
 import Prelude (getLine)
@@ -67,7 +66,7 @@ main = do
  _ok  <- getLine
  cfg       <- readConfig         iniPath
-  let secret = _gc_secretkey cfg
+  let _secret = _gc_secretkey cfg
  withDevEnv iniPath $ \env -> do
    -- First upgrade the Database Schema

--- a/devops/docker/docker-compose.yaml
+++ b/devops/docker/docker-compose.yaml
@@ -21,10 +21,11 @@ services:
  postgres:
    #image: 'postgres:latest'
    image: 'postgres:14'
+    shm_size: 1g  # https://stackoverflow.com/a/56754077
    network_mode: host
    #command: ["postgres", "-c", "log_statement=all"]
    #ports:
-      #- 5432:5432
+    #  - 5432:5432
    environment:
      POSTGRES_USER: gargantua
      POSTGRES_PASSWORD: C8kdcUrAQy66U

--- a/devops/postgres/schema.sql
+++ b/devops/postgres/schema.sql
@@ -164,6 +164,7 @@ CREATE TABLE public.context_node_ngrams (
    ngrams_id     INTEGER NOT NULL REFERENCES public.ngrams   (id) ON DELETE CASCADE,
    ngrams_type   INTEGER  ,
    weight double precision,
+    doc_count     INTEGER DEFAULT 0,
    PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
  );
 ALTER TABLE public.context_node_ngrams OWNER TO gargantua;

--- a/devops/postgres/upgrade/0.0.6.8.3.sql
+++ b/devops/postgres/upgrade/0.0.6.8.3.sql
+ALTER TABLE context_node_ngrams
+  ADD COLUMN doc_count INTEGER DEFAULT 0;
--- a/gargantext.cabal
+++ b/gargantext.cabal
@@ -5,7 +5,7 @@ cabal-version: 1.12
 -- see: https://github.com/sol/hpack
 name:           gargantext
-version:        0.0.6.8.5
+version:        0.0.6.8.5.1
 synopsis:       Search, map, share
 description:    Please see README.md
 category:       Data
@@ -50,6 +50,7 @@ library
      Gargantext.Core.Types
      Gargantext.Core.Types.Individu
      Gargantext.Core.Types.Main
+      Gargantext.Core.Utils
      Gargantext.Core.Utils.Prefix
      Gargantext.Utils.Jobs
      Gargantext.Utils.Jobs.API
@@ -87,6 +88,7 @@ library
      Gargantext.Core.Text.Prepare
      Gargantext.Core.Text.Search
      Gargantext.Core.Text.Terms
+      Gargantext.Core.Text.Terms.Eleve
      Gargantext.Core.Text.Terms.Mono
      Gargantext.Core.Text.Terms.Multi.Lang.En
      Gargantext.Core.Text.Terms.Multi.Lang.Fr
@@ -211,7 +213,6 @@ library
      Gargantext.Core.Text.Samples.EN
      Gargantext.Core.Text.Samples.FR
      Gargantext.Core.Text.Samples.SP
-      Gargantext.Core.Text.Terms.Eleve
      Gargantext.Core.Text.Terms.Mono.Stem
      Gargantext.Core.Text.Terms.Mono.Stem.En
      Gargantext.Core.Text.Terms.Mono.Token
@@ -221,7 +222,6 @@ library
      Gargantext.Core.Text.Terms.Multi.PosTagging
      Gargantext.Core.Text.Terms.Multi.PosTagging.Types
      Gargantext.Core.Types.Phylo
-      Gargantext.Core.Utils
      Gargantext.Core.Utils.DateUtils
      Gargantext.Core.Viz
      Gargantext.Core.Viz.Chart
@@ -809,6 +809,7 @@ test-suite garg-test
      Core.Text
      Core.Text.Examples
      Core.Text.Flow
+      Core.Utils
      Graph.Clustering
      Graph.Distance
      Ngrams.Lang

--- a/package.yaml
+++ b/package.yaml
@@ -6,7 +6,7 @@ name: gargantext
 #         | | | +-----  Layers * : New versions with API additions
 #         | | | | +---  Layers * : New versions without API breaking changes
 #         | | | | |
-version: '0.0.6.8.5'
+version: '0.0.6.8.5.1'
 synopsis: Search, map, share
 description: Please see README.md
 category: Data
@@ -74,6 +74,7 @@ library:
  - Gargantext.Core.Types
  - Gargantext.Core.Types.Individu
  - Gargantext.Core.Types.Main
+  - Gargantext.Core.Utils
  - Gargantext.Core.Utils.Prefix
  - Gargantext.Utils.Jobs
  - Gargantext.Utils.Jobs.API
@@ -111,6 +112,7 @@ library:
  - Gargantext.Core.Text.Prepare
  - Gargantext.Core.Text.Search
  - Gargantext.Core.Text.Terms
+  - Gargantext.Core.Text.Terms.Eleve
  - Gargantext.Core.Text.Terms.Mono
  - Gargantext.Core.Text.Terms.Multi.Lang.En
  - Gargantext.Core.Text.Terms.Multi.Lang.Fr

--- a/src-test/Core/Text.hs
+++ b/src-test/Core/Text.hs
 {-|
 Module      : Graph.Clustering
 Description : Basic tests to avoid quick regression

--- a/src-test/Core/Utils.hs
+++ b/src-test/Core/Utils.hs
+{-|
+Module      : Core.Utils
+Description :
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+-}
+module Core.Utils where
+import Test.Hspec
+import Gargantext.Prelude
+import Gargantext.Core.Utils
+-- | Core.Utils tests
+test :: IO ()
+test = hspec $ do
+  describe "check if groupWithCounts works" $ do
+    it "simple integer array" $ do
+      (groupWithCounts [1, 2, 3, 1, 2, 3]) `shouldBe` [(1, 2), (2, 2), (3, 2)]
+    it "string" $ do
+      (groupWithCounts "abccba") `shouldBe` [('a', 2), ('b', 2), ('c', 2)]
--- a/src-test/Graph/Clustering.hs
+++ b/src-test/Graph/Clustering.hs
 {-|
 Module      : Graph.Clustering
 Description : Basic tests to avoid quick regression
@@ -40,4 +39,3 @@ test = hspec $ do
      let
        result = List.length partitions > 1
      shouldBe True result
--- a/src-test/Main.hs
+++ b/src-test/Main.hs
@@ -11,6 +11,8 @@ Portability : POSIX
 import Gargantext.Prelude
 import Gargantext.Core (Lang(..))
+import qualified Core.Utils as Utils
 --import qualified Ngrams.Lang.Fr as Fr
 --import qualified Ngrams.Lang as Lang
 import qualified Ngrams.Lang.Occurrences as Occ
@@ -22,11 +24,12 @@ import qualified Utils.Crypto            as Crypto
 main :: IO ()
 main = do
+  Utils.test
 --    Occ.parsersTest
 --    Lang.ngramsExtractionTest FR
 --    Lang.ngramsExtractionTest EN
 --    Metrics.main
-  Graph.test 
+  Graph.test
  PD.testFromRFC3339
 --    GD.test
  Crypto.test
--- a/src-test/Utils/Crypto.hs
+++ b/src-test/Utils/Crypto.hs
 {-|
 Module      : Utils.Crypto
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -43,4 +42,3 @@ test = hspec $ do
    let hash2 = hash (["b","a"] :: [Text])
    it "compare" $ do
      hash1 `shouldBe` hash2
--- a/src/Gargantext/API/Ngrams.hs
+++ b/src/Gargantext/API/Ngrams.hs
@@ -187,9 +187,9 @@ saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
 saveNodeStory = do
  saver <- view hasNodeStorySaver
  liftBase $ do
-    Gargantext.Prelude.putStrLn "---- Running node story saver ----"
+    --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
    saver
-    Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
+    --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
 saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
@@ -197,9 +197,9 @@ saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmed
 saveNodeStoryImmediate = do
  saver <- view hasNodeStoryImmediateSaver
  liftBase $ do
-    Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
+    --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
    saver
-    Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
+    --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
 listTypeConflictResolution :: ListType -> ListType -> ListType

--- a/src/Gargantext/API/Ngrams/List.hs
+++ b/src/Gargantext/API/Ngrams/List.hs
@@ -130,26 +130,23 @@ reIndexWith :: ( HasNodeStory env err m
            -> Set ListType
            -> m ()
 reIndexWith cId lId nt lts = do
+  printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
  -- Getting [NgramsTerm]
  ts <- List.concat
     <$> map (\(k,vs) -> k:vs)
     <$> HashMap.toList
     <$> getTermsWith identity [lId] nt lts
-  -- printDebug "ts" ts
-  -- Taking the ngrams with 0 occurrences only (orphans)
-  -- occs <- getOccByNgramsOnlyFast' cId lId nt ts
-  -- printDebug "occs" occs
  let orphans = ts {- List.concat
              $ map (\t -> case HashMap.lookup t occs of
                       Nothing -> [t]
                       Just n  -> if n <= 1 then [t] else [ ]
                       ) ts
-        -}
+                   -}
-  -- printDebug "orphans" orphans
+  printDebug "orphans" orphans
  -- Get all documents of the corpus
  docs <- selectDocNodes cId
@@ -158,24 +155,25 @@ reIndexWith cId lId nt lts = do
  -- Checking Text documents where orphans match
  -- TODO Tests here
  let
-    ngramsByDoc = map (HashMap.fromListWith (<>))
+    -- fromListWith (<>)
-                $ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)))
+    ngramsByDoc = map (HashMap.fromList)
-                $ map (\doc -> List.zip
+                  $ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
-                                (termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
+                  $ map (\doc -> List.zip
-                                             $ Text.unlines $ catMaybes
+                                 (termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
-                                               [ doc ^. context_hyperdata . hd_title
+                                  $ Text.unlines $ catMaybes
-                                               , doc ^. context_hyperdata . hd_abstract
+                                  [ doc ^. context_hyperdata . hd_title
-                                               ]
+                                  , doc ^. context_hyperdata . hd_abstract
+                                  ]
                                 )
-                                (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
+                                 (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
                        ) docs
-  -- printDebug "ngramsByDoc" ngramsByDoc
+  printDebug "ngramsByDoc: " ngramsByDoc
  -- Saving the indexation in database
  _ <- mapM (saveDocNgramsWith lId) ngramsByDoc
-  pure () -- ngramsByDoc
+  pure ()
 toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
 toIndexedNgrams m t = Indexed <$> i <*> n

--- a/src/Gargantext/API/Ngrams/Tools.hs
+++ b/src/Gargantext/API/Ngrams/Tools.hs
@@ -195,7 +195,7 @@ getCoocByNgrams' f (Diagonal diag) m =
                                listToCombi identity ks
              ]
-  where 
+  where
    ks = HM.keys m
 -- TODO k could be either k1 or k2 here
@@ -220,7 +220,7 @@ getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
                                -- TODO check optim
                                -- listToCombi identity ks1
              ]
-  where 
+  where
    ks1 = HM.keys m1
    ks2 = HM.keys m2

--- a/src/Gargantext/API/Prelude.hs
+++ b/src/Gargantext/API/Prelude.hs
@@ -29,6 +29,7 @@ import Control.Monad.Reader (ReaderT)
 import Control.Monad.Error.Class (MonadError(..))
 import Crypto.JOSE.Error as Jose
 import Data.Aeson.Types
+import qualified Data.Text as Text
 import Data.Typeable
 import Data.Validity
 import Gargantext.API.Admin.Orchestrator.Types
@@ -115,7 +116,7 @@ data GargError
 makePrisms ''GargError
 instance ToJSON GargError where
-  toJSON _ = String "SomeGargErrorPleaseReport"
+  toJSON err = object [("error", String $ Text.pack $ show err)]
 instance Exception GargError

--- a/src/Gargantext/API/Routes.hs
+++ b/src/Gargantext/API/Routes.hs
@@ -289,7 +289,7 @@ addCorpusWithForm user cid =
  serveJobsAPI AddCorpusFormJob $ \i log' ->
      let
        log'' x = do
-          printDebug "[addToCorpusWithForm] " x
+          --printDebug "[addToCorpusWithForm] " x
          liftBase $ log' x
      in New.addToCorpusWithForm user cid i log'' (jobLogInit 3)

--- a/src/Gargantext/API/Search.hs
+++ b/src/Gargantext/API/Search.hs
@@ -18,6 +18,7 @@ module Gargantext.API.Search
      where
 import Data.Aeson hiding (defaultTaggedObject)
+-- import Data.List (concat)
 import Data.Maybe (fromMaybe)
 import Data.Swagger hiding (fieldLabelModifier, Contact)
 import Data.Text (Text)
@@ -56,6 +57,7 @@ api nId (SearchQuery q SearchDoc) o l order =
  SearchResult <$> SearchResultDoc
               <$> map (toRow nId)
               <$> searchInCorpus nId False q o l order
+               -- <$> searchInCorpus nId False (concat q) o l order
 api nId (SearchQuery q SearchContact) o l order = do
  printDebug "isPairedWith" nId
@@ -69,11 +71,13 @@ api nId (SearchQuery q SearchContact) o l order = do
            <$> map (toRow aId)
            <$> searchInCorpusWithContacts nId aId q o l order
+api _nId (SearchQuery _q SearchDocWithNgrams) _o _l _order = undefined
 -----------------------------------------------------------------------
 -----------------------------------------------------------------------
 -- | Main Types
 -----------------------------------------------------------------------
-data SearchType = SearchDoc | SearchContact
+data SearchType = SearchDoc | SearchContact | SearchDocWithNgrams
  deriving (Generic)
 instance FromJSON SearchType where
  parseJSON = genericParseJSON (defaultOptions { sumEncoding = ObjectWithSingleField })
@@ -132,7 +136,7 @@ instance FromJSON SearchResultTypes where
  parseJSON = genericParseJSON (defaultOptions { sumEncoding = defaultTaggedObject })
 instance ToJSON SearchResultTypes where
  toJSON = genericToJSON (defaultOptions { sumEncoding = defaultTaggedObject })
 instance Arbitrary SearchResultTypes where
  arbitrary = do
    srd <- SearchResultDoc     <$> arbitrary
@@ -163,7 +167,7 @@ data Row =
  deriving (Generic)
 instance FromJSON  Row
  where
-    parseJSON = genericParseJSON 
+    parseJSON = genericParseJSON
                 ( defaultOptions { sumEncoding = defaultTaggedObject } )
 instance ToJSON  Row
  where

--- a/src/Gargantext/Core/NodeStory.hs
+++ b/src/Gargantext/Core/NodeStory.hs
@@ -455,7 +455,7 @@ insertArchiveList c nodeId a = do
  where
    query :: PGS.Query
    query = [sql| WITH s as (SELECT ? as sid, ? sversion, ? sngrams_type_id, ngrams.id as sngrams_id, ?::jsonb as srepo FROM ngrams WHERE terms = ?)
-                  INSERT INTO node_stories(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element) 
+                  INSERT INTO node_stories(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
                  SELECT s.sid, s.sversion, s.sngrams_type_id, s.sngrams_id, s.srepo from s s join nodes n on s.sid = n.id
            |]
@@ -505,22 +505,22 @@ updateNodeStory c nodeId@(NodeId _nId) currentArchive newArchive = do
  --printDebug "[updateNodeStory] updates" $ Text.unlines $ (Text.pack . show) <$> updates
  -- 2. Perform inserts/deletes/updates
-  printDebug "[updateNodeStory] applying insert" ()
+  --printDebug "[updateNodeStory] applying insert" ()
  insertArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version
                                       , _a_history = []
                                       , _a_state = archiveStateFromList inserts }
-  printDebug "[updateNodeStory] insert applied" ()
+  --printDebug "[updateNodeStory] insert applied" ()
    --TODO Use currentArchive ^. a_version in delete and report error
  -- if entries with (node_id, ngrams_type_id, ngrams_id) but
  -- different version are found.
  deleteArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version
                                       , _a_history = []
                                       , _a_state = archiveStateFromList deletes }
-  printDebug "[updateNodeStory] delete applied" ()
+  --printDebug "[updateNodeStory] delete applied" ()
  updateArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version
                                       , _a_history = []
                                       , _a_state = archiveStateFromList updates }
-  printDebug "[updateNodeStory] update applied" ()
+  --printDebug "[updateNodeStory] update applied" ()
  pure ()
  -- where

--- a/src/Gargantext/Core/Text/Terms.hs
+++ b/src/Gargantext/Core/Text/Terms.hs
@@ -28,8 +28,9 @@ compute graph
 -}
-{-# LANGUAGE TemplateHaskell   #-}
 {-# LANGUAGE ConstrainedClassMethods #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TemplateHaskell   #-}
 module Gargantext.Core.Text.Terms
  where
@@ -47,6 +48,7 @@ import qualified Data.Set            as Set
 import qualified Data.Text           as Text
 import qualified Data.HashMap.Strict as HashMap
 import Gargantext.Core
+import Gargantext.Core.Utils (groupWithCounts)
 import Gargantext.Core.Text (sentences, HasText(..))
 import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
 import Gargantext.Core.Text.Terms.Mono  (monoTerms)
@@ -70,24 +72,23 @@ data TermType lang
                 , _tt_model      :: !(Maybe (Tries Token ()))
                 }
  deriving (Generic)
+deriving instance (Show lang) => Show (TermType lang)
 makeLenses ''TermType
 --group :: [Text] -> [Text]
 --group = undefined
 -- remove Stop Words
-- map (filter (\t -> not . elem t)) $ 
+-- map (filter (\t -> not . elem t)) $
 ------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user).
+-- | Sugar to extract terms from text (hidding 'mapM' from end user).
 --extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
-extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
+extractTerms :: TermType Lang -> [Text] -> IO [[TermsWithCount]]
 extractTerms (Unsupervised {..}) xs = mapM (terms (Unsupervised { _tt_model = Just m', .. })) xs
  where
    m' = case _tt_model of
      Just m''-> m''
      Nothing -> newTries _tt_windowSize (Text.intercalate " " xs)
 extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
@@ -116,12 +117,13 @@ data ExtractedNgrams = SimpleNgrams   { unSimpleNgrams   :: Ngrams       }
 instance Hashable ExtractedNgrams
+-- | A typeclass that represents extracting ngrams from an entity.
 class ExtractNgramsT h
  where
    extractNgramsT :: HasText h
                   => TermType Lang
                   -> h
-                   -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
+                   -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
 ------------------------------------------------------------------------
 enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
 enrichedTerms l pa po (Terms ng1 ng2) =
@@ -132,7 +134,7 @@ enrichedTerms l pa po (Terms ng1 ng2) =
 ------------------------------------------------------------------------
 cleanNgrams :: Int -> Ngrams -> Ngrams
-cleanNgrams s ng 
+cleanNgrams s ng
      | Text.length (ng ^. ngramsTerms) < s = ng
      | otherwise                           = text2ngrams (Text.take s (ng ^. ngramsTerms))
@@ -151,10 +153,10 @@ insertExtractedNgrams ngs = do
  let (s, e) = List.partition isSimpleNgrams ngs
  m1 <- insertNgrams       (map unSimpleNgrams   s)
  --printDebug "others" m1
  m2 <- insertNgramsPostag (map unEnrichedNgrams e)
  --printDebug "terms" m2
  let result = HashMap.union m1 m2
  pure result
@@ -163,43 +165,41 @@ isSimpleNgrams (SimpleNgrams _) = True
 isSimpleNgrams _                = False
 ------------------------------------------------------------------------
-- | Terms from Text
+-- | Terms from 'Text'
-- Mono : mono terms
+-- 'Mono' : mono terms
-- Multi : multi terms
+-- 'Multi' : multi terms
-- MonoMulti : mono and multi
+-- 'MonoMulti' : mono and multi
 -- TODO : multi terms should exclude mono (intersection is not empty yet)
-terms :: TermType Lang -> Text -> IO [Terms]
+terms :: TermType Lang -> Text -> IO [TermsWithCount]
 terms (Mono      lang) txt = pure $ monoTerms lang txt
 terms (Multi     lang) txt = multiterms lang txt
 terms (MonoMulti lang) txt = terms (Multi lang) txt
-terms (Unsupervised { .. }) txt = termsUnsupervised (Unsupervised { _tt_model = Just m', .. }) txt
+terms (Unsupervised { .. }) txt = pure $ termsUnsupervised (Unsupervised { _tt_model = Just m', .. }) txt
  where
    m' = maybe (newTries _tt_ngramsSize txt) identity _tt_model
 -- terms (WithList  list) txt = pure . concat $ extractTermsWithList list txt
 ------------------------------------------------------------------------
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: newtype BlockText
 type WindowSize = Int
 type MinNgramSize = Int
-termsUnsupervised :: TermType Lang -> Text -> IO [Terms]
+-- | Unsupervised ngrams extraction
-termsUnsupervised (Unsupervised l n s m) =
+-- language agnostic extraction
-               pure
+-- TODO: newtype BlockText
-             . map (text2term l)
+termsUnsupervised :: TermType Lang -> Text -> [TermsWithCount]
-             . List.nub
+termsUnsupervised (Unsupervised { _tt_model = Nothing }) = panic "[termsUnsupervised] no model"
-             . (List.filter (\l' -> List.length l' >= s))
+termsUnsupervised (Unsupervised { _tt_model = Just _tt_model, .. }) =
+               map (\(t, cnt) -> (text2term _tt_lang t, cnt))
+             . groupWithCounts
+             -- . List.nub
+             . (List.filter (\l' -> List.length l' >= _tt_windowSize))
             . List.concat
-             . mainEleveWith (maybe (panic "no model") identity m) n
+             . mainEleveWith _tt_model _tt_ngramsSize
             . uniText
 termsUnsupervised _ = undefined
 newTries :: Int -> Text -> Tries Token ()
 newTries n t = buildTries n (fmap toToken $ uniText t)
@@ -217,5 +217,3 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
 isPunctuation :: Text -> Bool
 isPunctuation x = List.elem x $  (Text.pack . pure)
                             <$> ("!?(),;.:" :: String)
--- a/src/Gargantext/Core/Text/Terms/Eleve.hs
+++ b/src/Gargantext/Core/Text/Terms/Eleve.hs
@@ -32,6 +32,7 @@ Notes for current implementation:
 -}
 {-# LANGUAGE ConstraintKinds   #-}
+{-# LANGUAGE StandaloneDeriving   #-}
 {-# LANGUAGE TemplateHaskell   #-}
 {-# LANGUAGE TypeFamilies      #-}
@@ -278,6 +279,8 @@ data Tries k e = Tries
 makeLenses ''Tries
+deriving instance (Show k, Show e) => Show (Tries k e)
 buildTries :: Int -> [[Token]] -> Tries Token ()
 buildTries n sentences = Tries
  { _fwd = buildTrie Forward  n sentences

--- a/src/Gargantext/Core/Text/Terms/Mono.hs
+++ b/src/Gargantext/Core/Text/Terms/Mono.hs
@@ -40,8 +40,8 @@ words = monoTexts
 isSep :: Char -> Bool
 isSep = (`elem` (",.:;?!(){}[]\"\'" :: String))
-monoTerms :: Lang -> Text -> [Terms]
+monoTerms :: Lang -> Text -> [TermsWithCount]
-monoTerms l txt = map (monoText2term l) $ monoTexts txt
+monoTerms l txt = map (\t -> (monoText2term l t, 1)) $ monoTexts txt
 monoTexts :: Text -> [Text]
 monoTexts = L.concat . monoTextsBySentence

--- a/src/Gargantext/Core/Text/Terms/Multi.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi.hs
@@ -21,6 +21,7 @@ import Data.List (concat)
 import Gargantext.Prelude
 import Gargantext.Core (Lang(..))
 import Gargantext.Core.Types
+import Gargantext.Core.Utils (groupWithCounts)
 import Gargantext.Core.Text.Terms.Multi.PosTagging
 import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
@@ -37,14 +38,16 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
 type NLP_API = Lang -> Text -> IO PosSentences
 -------------------------------------------------------------------
-multiterms :: Lang -> Text -> IO [Terms]
+multiterms :: Lang -> Text -> IO [TermsWithCount]
-multiterms = multiterms' tokenTag2terms
+multiterms l txt = do
+  ret <- multiterms' tokenTag2terms l txt
+  pure $ groupWithCounts ret
  where
    multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a]
-    multiterms' f lang txt = concat
+    multiterms' f lang txt' = concat
                       <$> map (map f)
                       <$> map (filter (\t -> _my_token_pos t == Just NP))
-                       <$> tokenTags lang txt
+                       <$> tokenTags lang txt'
 -------------------------------------------------------------------
 tokenTag2terms :: TokenTag -> Terms
@@ -57,10 +60,10 @@ tokenTags l  _   = panic $ "[G.C.T.T.Multi] Lang NLP API not implemented yet " <
 tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
 tokenTagsWith lang txt nlp = map (groupTokens lang)
-                          <$> map tokens2tokensTags
+                         <$> map tokens2tokensTags
-                          <$> map _sentenceTokens
+                         <$> map _sentenceTokens
-                          <$> _sentences
+                         <$> _sentences
-                          <$> nlp lang txt
+                         <$> nlp lang txt
 ---- | This function analyses and groups (or not) ngrams according to

--- a/src/Gargantext/Core/Text/Terms/Multi/Group.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/Group.hs
@@ -30,4 +30,3 @@ group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
 group2 _ _ [x@(TokenTag _ _ (Just _) _)] = [x]
 group2 p1 p2 (x@(TokenTag _ _ (Just _) _):y@(TokenTag _ _ Nothing _):z) = (x:y: group2 p1 p2 (y:z))
 group2 _ _ [] = []
--- a/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
@@ -40,4 +40,3 @@ groupTokens ntags = group2 NP NP
 --groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs)         = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
 --
 --groupNgrams (x:xs)                                          = (x:(groupNgrams xs))
--- a/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
@@ -45,19 +45,22 @@ tokens2tokensTags :: [Token] -> [TokenTag]
 tokens2tokensTags ts = filter' $ map tokenTag ts
 ------------------------------------------------------------------------
 tokenTag :: Token -> TokenTag
-tokenTag (Token _ w _ l _ _ p n _ _) = TokenTag w' l' p n
+tokenTag (Token { .. }) = TokenTag { _my_token_word = w'
+                                   , _my_token_lemma = l'
+                                   , _my_token_pos = _tokenPos
+                                   , _my_token_ner = _tokenNer }
  where
-    w' = split w
+    w' = split _tokenWord
-    l' = fromList (split l)
+    l' = fromList (split _tokenLemma)
    split = splitOn (pack " ") . toLower
 filter' :: [TokenTag] -> [TokenTag]
 filter' xs = filter isNgrams xs
    where
-      isNgrams (TokenTag _ _ p n) = isJust p || isJust n
+      isNgrams (TokenTag { .. }) = isJust _my_token_pos || isJust _my_token_ner
 ------------------------------------------------------------------------
-- request = 
+-- request =
 -- "fr" : {
 --                 "tokenize.language" : "fr",
 --                 "pos.model" : "edu/stanford/nlp/models/pos-tagger/french/french.tagger",
@@ -66,9 +69,9 @@ filter' xs = filter isNgrams xs
 --                 "depparse.model" : "edu/stanford/nlp/models/parser/nndep/UD_French.gz",
 --                 "depparse.language" : "french",
 --                 "ner.model":  DATA_ROOT+"/eunews.fr.crf.gz",
--                 "ssplit.newlineIsSentenceBreak": "always" 
+--                 "ssplit.newlineIsSentenceBreak": "always"
 --             },
-- 
+--
 corenlp' :: ( FromJSON a
            , ConvertibleStrings p ByteString
@@ -80,6 +83,7 @@ corenlp' lang txt = do
            FR -> "{\"annotators\": \"tokenize,ssplit,pos,lemma,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
            _  -> panic $ pack "not implemented yet"
    url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
+    -- curl -XPOST 'http://localhost:9000/?properties=%7B%22annotators%22:%20%22tokenize,ssplit,pos,ner%22,%20%22outputFormat%22:%20%22json%22%7D' -d 'hello world, hello' | jq .
    let request = setRequestBodyLBS (cs txt) url
    httpJSON request

--- a/src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
@@ -52,4 +52,3 @@ data PosSentences = PosSentences { _sentences :: [Sentence]}
  deriving (Show, Generic)
 $(deriveJSON (unPrefix "_") ''PosSentences)
--- a/src/Gargantext/Core/Text/Terms/WithList.hs
+++ b/src/Gargantext/Core/Text/Terms/WithList.hs
 {-|
 Module      : Gargantext.Core.Text.Terms.WithList
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -21,6 +21,8 @@ import Data.Text (Text, concat, unwords)
 import Gargantext.Prelude
 import Gargantext.Core.Text.Context
 import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence)
+import Gargantext.Core.Types (TermsCount)
+import Gargantext.Core.Utils (groupWithCounts)
 import Prelude (error)
 import qualified Data.Algorithms.KMP as KMP
 import qualified Data.IntMap.Strict  as IntMap
@@ -35,8 +37,11 @@ data Pattern = Pattern
 type Patterns = [Pattern]
 ------------------------------------------------------------------------
-replaceTerms :: Patterns -> [Text] -> [[Text]]
-replaceTerms pats terms = go 0
+data ReplaceTerms = KeepAll | LongestOnly
+replaceTerms :: ReplaceTerms -> Patterns -> [Text] -> [[Text]]
+replaceTerms rplaceTerms pats terms = go 0
  where
    terms_len = length terms
@@ -47,15 +52,17 @@ replaceTerms pats terms = go 0
        Just (len, term) ->
          term : go (ix + len)
+    m = toMap
-    merge (len1, lab1) (len2, lab2) =
-      if len2 < len1 then (len1, lab1) else (len2, lab2)
-    m =
-      IntMap.fromListWith merge
        [ (ix, (len, term))
        | Pattern pat len term <- pats, ix <- KMP.match pat terms ]
+    toMap = case rplaceTerms of
+      KeepAll -> IntMap.fromList
+      LongestOnly -> IntMap.fromListWith merge
+        where
+          merge (len1, lab1) (len2, lab2) =
+            if len2 < len1 then (len1, lab1) else (len2, lab2)
 buildPatterns :: TermList -> Patterns
 buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
  where
@@ -71,8 +78,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
 --------------------------------------------------------------------------
 -- Utils
 type MatchedText = Text
-termsInText :: Patterns -> Text -> [MatchedText]
+termsInText :: Patterns -> Text -> [(MatchedText, TermsCount)]
-termsInText pats txt = List.nub
+termsInText pats txt = groupWithCounts
                     $ List.concat
                     $ map (map unwords)
                     $ extractTermsWithList pats txt
@@ -80,14 +87,14 @@ termsInText pats txt = List.nub
 --------------------------------------------------------------------------
 extractTermsWithList :: Patterns -> Text -> Corpus [Text]
-extractTermsWithList pats = map (replaceTerms pats) . monoTextsBySentence
+extractTermsWithList pats = map (replaceTerms KeepAll pats) . monoTextsBySentence
 -- | Extract terms
 -- >>> let termList = [(["chat blanc"], [["chat","blanc"]])] :: TermList
 -- extractTermsWithList' (buildPatterns termList) "Le chat blanc"["chat blanc"]
 -- ["chat blanc"]
 extractTermsWithList' :: Patterns -> Text -> [Text]
-extractTermsWithList' pats = map (concat . map concat . replaceTerms pats)
+extractTermsWithList' pats = map (concat . map concat . replaceTerms KeepAll pats)
                           . monoTextsBySentence
 --------------------------------------------------------------------------
@@ -95,7 +102,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms pats)
 {- | Not used
 filterWith :: TermList
           -> (a -> Text)
-           -> [a] 
+           -> [a]
           -> [(a, [Text])]
 filterWith termList f xs = filterWith' termList f zip xs
@@ -103,7 +110,7 @@ filterWith termList f xs = filterWith' termList f zip xs
 filterWith' :: TermList
           -> (a -> Text)
           -> ([a] -> [[Text]] -> [b])
-           -> [a] 
+           -> [a]
           -> [b]
 filterWith' termList f f' xs = f' xs
                            $ map (extractTermsWithList' pats)

--- a/src/Gargantext/Core/Types.hs
+++ b/src/Gargantext/Core/Types.hs
 {-|
 Module      : Gargantext.Types
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -17,7 +17,7 @@ commentary with @some markup@.
 module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
                             , module Gargantext.Database.Admin.Types.Node
                             , DebugMode(..), withDebugMode
-                             , Term, Terms(..)
+                             , Term, Terms(..), TermsCount, TermsWithCount
                             , TokenTag(..), POS(..), NER(..)
                             , Label, Stems
                             , HasInvalidError(..), assertValid
@@ -70,10 +70,13 @@ type Label = [Text]
 data Terms = Terms { _terms_label :: Label
                   , _terms_stem  :: Stems
                   } deriving (Ord, Show)
 instance Eq Terms where
  (==) (Terms _ s1) (Terms _ s2) = s1 == s2
+type TermsCount = Int
+type TermsWithCount = (Terms, TermsCount)
 ------------------------------------------------------------------------
 data Tag = POS | NER
  deriving (Show, Eq)
@@ -208,5 +211,3 @@ data TODO = TODO
 instance ToSchema TODO where
 instance ToParamSchema TODO where
 ----------------------------------------------------------------------------
--- a/src/Gargantext/Core/Utils.hs
+++ b/src/Gargantext/Core/Utils.hs
 {-|
 Module      : Gargantext.Utils
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -12,16 +12,19 @@ commentary with @some markup@.
 -}
-module Gargantext.Core.Utils ( 
+module Gargantext.Core.Utils (
                           -- module Gargantext.Utils.Chronos
                             module Gargantext.Core.Utils.Prefix
                           , something
                           , alphanum
                           , choices
                           , randomString
+                           , groupWithCounts
+                           , addTuples
                          ) where
 import Data.Char (chr, ord)
+import qualified Data.List as List
 import Data.Maybe
 import Data.Monoid
 import Data.Text (Text, pack)
@@ -57,3 +60,17 @@ randomString :: Int -> IO Text
 randomString num = do
  str <- choices num alphanum
  pure $ pack str
+-- | Given a list of items of type 'a', return list with unique items
+-- (like List.nub) but tuple-d with their counts in the original list
+groupWithCounts :: (Ord a, Eq a) => [a] -> [(a, Int)]
+groupWithCounts = map f
+                . List.group
+                . List.sort
+  where
+    f [] = panic "[groupWithCounts] impossible"
+    f ts@(t:_) = (t, length ts)
+addTuples :: (Num a, Num b) => (a, b) -> (a, b) -> (a, b)
+addTuples (a1, b1) (a2, b2) = (a1 + a2, b1 + b2)
--- a/src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
+++ b/src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
@@ -65,7 +65,7 @@ flowPhylo cId = do
    patterns = buildPatterns termList
    -- | To filter the Ngrams of a document based on the termList
    filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
-    filterTerms patterns' (y,d) = (y,termsInText patterns' d)
+    filterTerms patterns' (y,d) = (y, fst <$> termsInText patterns' d)
    docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
@@ -123,4 +123,3 @@ writePhylo _fp _phview = undefined
 -- refactor 2021
 -- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
 -- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
--- a/src/Gargantext/Database/Action/Flow.hs
+++ b/src/Gargantext/Database/Action/Flow.hs
@@ -48,7 +48,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
    where
 import Conduit
-import Control.Lens ((^.), view, _Just, makeLenses)
+import Control.Lens ((^.), view, _Just, makeLenses, over, traverse)
 import Control.Monad.Reader (MonadReader)
 import Data.Aeson.TH (deriveJSON)
 import Data.Conduit.Internal (zipSources)
@@ -61,7 +61,6 @@ import Data.Maybe (catMaybes)
 import Data.Monoid
 import Data.Swagger
 import qualified Data.Text as T
-import Data.Traversable (traverse)
 import Data.Tuple.Extra (first, second)
 import GHC.Generics (Generic)
 import Servant.Client (ClientError)
@@ -84,9 +83,10 @@ import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(.
 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
 import Gargantext.Core.Text.Terms
 import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
-import Gargantext.Core.Types (POS(NP))
+import Gargantext.Core.Types (POS(NP), TermsCount)
 import Gargantext.Core.Types.Individu (User(..))
 import Gargantext.Core.Types.Main
+import Gargantext.Core.Utils (addTuples)
 import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
 import Gargantext.Database.Action.Flow.List
 import Gargantext.Database.Action.Flow.Types
@@ -362,25 +362,27 @@ insertMasterDocs c lang hs  =  do
  -- this will enable global database monitoring
  -- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
-  mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
+  mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
                <- mapNodeIdNgrams
                <$> documentIdWithNgrams
-                    (extractNgramsT $ withLang lang documentsWithId)
+                      (extractNgramsT $ withLang lang documentsWithId)
-                    documentsWithId
+                      documentsWithId
  lId      <- getOrMkList masterCorpusId masterUserId
+  -- _ <- saveDocNgramsWith lId mapNgramsDocs'
  _ <- saveDocNgramsWith lId mapNgramsDocs'
  -- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
  pure ids'
-saveDocNgramsWith :: ( FlowCmdM env err m)
+saveDocNgramsWith :: (FlowCmdM env err m)
                  => ListId
-                  -> HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
+                  -> HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
                  -> m ()
 saveDocNgramsWith lId mapNgramsDocs' = do
-  terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
+  --printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
-  --printDebug "terms2id" terms2id
+  let mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
+  terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocsNoCount
  let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
@@ -397,7 +399,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
                                            <*> Just (fromIntegral w :: Double)
                       | (terms'', mapNgramsTypes)      <- HashMap.toList mapNgramsDocs
                       , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
-                       , (nId, w)                       <- Map.toList mapNodeIdWeight
+                       , (nId, (w, _cnt))                       <- Map.toList mapNodeIdWeight
                       ]
  -- to be removed
@@ -456,14 +458,14 @@ mergeData rs = catMaybes . map toDocumentWithId . Map.toList
 ------------------------------------------------------------------------
 documentIdWithNgrams :: HasNodeError err
                     => (a
-                     -> Cmd err (HashMap b (Map NgramsType Int)))
+                     -> Cmd err (HashMap b (Map NgramsType Int, TermsCount)))
                     -> [Indexed NodeId a]
                     -> Cmd err [DocumentIdWithNgrams a b]
 documentIdWithNgrams f = traverse toDocumentIdWithNgrams
  where
    toDocumentIdWithNgrams d = do
      e <- f $ _unIndex         d
-      pure   $ DocumentIdWithNgrams d e
+      pure $ DocumentIdWithNgrams d e
 -- | TODO check optimization
@@ -471,13 +473,17 @@ mapNodeIdNgrams :: (Ord b, Hashable b)
                => [DocumentIdWithNgrams a b]
                -> HashMap b
                       (Map NgramsType
-                            (Map NodeId Int)
+                            (Map NodeId (Int, TermsCount))
                       )
-mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
+mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith addTuples)) . fmap f
  where
+    -- | NOTE We are somehow multiplying 'TermsCount' here: If the
+    -- same ngrams term has different ngrams types, the 'TermsCount'
+    -- for it (which is the number of times the terms appears in a
+    -- document) is copied over to all its types.
    f :: DocumentIdWithNgrams a b
-      -> HashMap b (Map NgramsType (Map NodeId Int))
+      -> HashMap b (Map NgramsType (Map NodeId (Int, TermsCount)))
-    f d = fmap (fmap (Map.singleton nId)) $ documentNgrams d
+    f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\i -> Map.singleton nId (i, cnt)) ngramsTypeMap) $ documentNgrams d
      where
        nId = _index $ documentWithId d
@@ -488,25 +494,25 @@ instance ExtractNgramsT HyperdataContact
    extractNgramsT l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
      where
        extract :: TermType Lang -> HyperdataContact
-                -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
+                -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
        extract _l hc' = do
          let authors = map text2ngrams
                      $ maybe ["Nothing"] (\a -> [a])
                      $ view (hc_who . _Just . cw_lastName) hc'
-          pure $ HashMap.fromList $ [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ]
+          pure $ HashMap.fromList $ [(SimpleNgrams a', (Map.singleton Authors 1, 1)) | a' <- authors ]
 instance ExtractNgramsT HyperdataDocument
  where
    extractNgramsT :: TermType Lang
                   -> HyperdataDocument
-                   -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
+                   -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
    extractNgramsT lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' lang hd
      where
        extractNgramsT' :: TermType Lang
                        -> HyperdataDocument
-                       -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
+                       -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
        extractNgramsT' lang' doc = do
          let source    = text2ngrams
                        $ maybe "Nothing" identity
@@ -520,23 +526,23 @@ instance ExtractNgramsT HyperdataDocument
                         $ maybe ["Nothing"] (T.splitOn ", ")
                         $ _hd_authors doc
-          terms' <- map (enrichedTerms (lang' ^. tt_lang) CoreNLP NP)
+          termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang' ^. tt_lang) CoreNLP NP t, cnt))
-                 <$> concat
+                              <$> concat
-                 <$> liftBase (extractTerms lang' $ hasText doc)
+                              <$> liftBase (extractTerms lang' $ hasText doc)
          pure $ HashMap.fromList
-               $  [(SimpleNgrams source, Map.singleton Sources     1)                    ]
+               $  [(SimpleNgrams source, (Map.singleton Sources     1, 1))                    ]
-               <> [(SimpleNgrams     i', Map.singleton Institutes  1) | i' <- institutes ]
+               <> [(SimpleNgrams     i', (Map.singleton Institutes  1, 1)) | i' <- institutes ]
-               <> [(SimpleNgrams     a', Map.singleton Authors     1) | a' <- authors    ]
+               <> [(SimpleNgrams     a', (Map.singleton Authors     1, 1)) | a' <- authors    ]
-               <> [(EnrichedNgrams   t', Map.singleton NgramsTerms 1) | t' <- terms'     ]
+               <> [(EnrichedNgrams   t', (Map.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts'     ]
 instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
  where
-    extractNgramsT l (Node _ _ _ _ _ _ _ h) = extractNgramsT l h
+    extractNgramsT l (Node { _node_hyperdata = h }) = extractNgramsT l h
 instance HasText a => HasText (Node a)
  where
-    hasText (Node _ _ _ _ _ _ _ h) = hasText h
+    hasText (Node { _node_hyperdata = h }) = hasText h

--- a/src/Gargantext/Database/Action/Flow/Utils.hs
+++ b/src/Gargantext/Database/Action/Flow/Utils.hs
@@ -15,6 +15,7 @@ module Gargantext.Database.Action.Flow.Utils
 import Data.Map (Map)
 import Data.HashMap.Strict (HashMap)
+import Gargantext.Core.Types (TermsCount)
 import Gargantext.Database.Admin.Types.Node
 import Gargantext.Database.Prelude (Cmd)
 import Gargantext.Database.Query.Table.ContextNodeNgrams
@@ -29,24 +30,24 @@ import qualified Data.HashMap.Strict as HashMap
 data DocumentIdWithNgrams a b =
     DocumentIdWithNgrams
     { documentWithId :: Indexed NodeId a
-     , documentNgrams :: HashMap b (Map NgramsType Int)
+     , documentNgrams :: HashMap b (Map NgramsType Int, TermsCount)
     } deriving (Show)
 insertDocNgrams :: ListId
-                -> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId Int))
+                -> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (Int, TermsCount)))
                -> Cmd err Int
-insertDocNgrams lId m = insertContextNodeNgrams ns
+insertDocNgrams lId m = do
+  printDebug "[insertDocNgrams] ns" ns
+  insertContextNodeNgrams ns
  where
    ns = [ ContextNodeNgrams docId lId (ng^.index)
                                   (ngramsTypeId t)
                                   (fromIntegral i)
+                                   cnt
         | (ng, t2n2i) <- HashMap.toList m
         , (t,  n2i)   <- DM.toList t2n2i
-         , (docId,  i)     <- DM.toList n2i
+         , (docId,  (i, cnt))     <- DM.toList n2i
         ]
+-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
+-- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}
--- a/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+++ b/src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
@@ -121,6 +121,7 @@ getOccByNgramsOnlyFast cId lId nt = do
           -> Cmd err [(Text, DPST.PGArray Int)]
      run cId' lId' nt' = runPGSQuery query
                ( cId'
+                , cId'
                , lId'
                , ngramsTypeId nt'
                )
@@ -130,7 +131,14 @@ getOccByNgramsOnlyFast cId lId nt = do
              SELECT ng.terms
                 --  , ng.id
                     --, round(nng.weight)
-                     , ARRAY(SELECT DISTINCT context_node_ngrams.context_id FROM context_node_ngrams WHERE ng.id = ngrams_id) AS context_ids
+                     , ARRAY(
+                         SELECT DISTINCT context_node_ngrams.context_id
+                         FROM context_node_ngrams
+                         JOIN nodes_contexts
+                           ON context_node_ngrams.context_id = nodes_contexts.context_id
+                         WHERE ng.id = context_node_ngrams.ngrams_id
+                         AND nodes_contexts.node_id = ?
+                         ) AS context_ids
                 -- , ns.version
                 -- , nng.ngrams_type
                 -- , ns.ngrams_type_id

--- a/src/Gargantext/Database/Action/Search.hs
+++ b/src/Gargantext/Database/Action/Search.hs
@@ -14,7 +14,10 @@ module Gargantext.Database.Action.Search where
 import Control.Arrow (returnA)
 import Control.Lens ((^.))
+import qualified Data.List as List
+import qualified Data.Map as Map
 import Data.Maybe
+import qualified Data.Set as Set
 import Data.Text (Text, unpack, intercalate)
 import Data.Time (UTCTime)
 import Gargantext.Core
@@ -26,8 +29,11 @@ import Gargantext.Database.Query.Filter
 import Gargantext.Database.Query.Join (leftJoin5)
 import Gargantext.Database.Query.Table.Node
 import Gargantext.Database.Query.Table.Context
+import Gargantext.Database.Query.Table.ContextNodeNgrams (queryContextNodeNgramsTable)
 import Gargantext.Database.Query.Table.NodeContext
 import Gargantext.Database.Query.Table.NodeContext_NodeContext
+import Gargantext.Database.Schema.ContextNodeNgrams (ContextNodeNgramsPoly(..))
+import Gargantext.Database.Schema.Ngrams (NgramsType(..))
 import Gargantext.Database.Schema.Node
 import Gargantext.Database.Schema.Context
 import Gargantext.Prelude
@@ -43,7 +49,7 @@ searchDocInDatabase :: HasDBid NodeType
                    -> Cmd err [(NodeId, HyperdataDocument)]
 searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
  where
-    -- | Global search query where ParentId is Master Node Corpus Id 
+    -- | Global search query where ParentId is Master Node Corpus Id
    queryDocInDatabase :: ParentId -> Text -> O.Select (Column SqlInt4, Column SqlJsonb)
    queryDocInDatabase _p q = proc () -> do
        row <- queryNodeSearchTable -< ()
@@ -51,6 +57,74 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
        restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument)
        returnA  -< (_ns_id row, _ns_hyperdata row)
+------------------------------------------------------------------------
+-- | Search ngrams in documents, ranking them by TF-IDF. We narrow our
+-- search only to map/candidate terms.
+searchInCorpusWithNgrams :: HasDBid NodeType
+               => CorpusId
+               -> ListId
+               -> IsTrash
+               -> NgramsType
+               -> [[Text]]
+               -> Maybe Offset
+               -> Maybe Limit
+               -> Maybe OrderBy
+               -> Cmd err [FacetDoc]
+searchInCorpusWithNgrams _cId _lId _t _ngt _q _o _l _order = undefined
+-- | Compute TF-IDF for all 'ngramIds' in given 'CorpusId'. In this
+-- case only the "TF" part makes sense and so we only compute the
+-- ratio of "number of times our terms appear in given document" and
+-- "number of all terms in document" and return a sorted list of
+-- document ids
+tfidfAll :: CorpusId -> [Int] -> Cmd err [Int]
+tfidfAll cId ngramIds = do
+  let ngramIdsSet = Set.fromList ngramIds
+  docsWithNgrams <- runOpaQuery (queryCorpusWithNgrams cId ngramIds) :: Cmd err [(Int, Int, Int)]
+  -- NOTE The query returned docs with ANY ngramIds. We need to further
+  -- restrict to ALL ngramIds.
+  let docsNgramsM =
+        Map.fromListWith (Set.union)
+            [ (ctxId, Set.singleton ngrams_id)
+            | (ctxId, ngrams_id, _) <- docsWithNgrams]
+  let docsWithAllNgramsS = Set.fromList $ List.map fst $
+        List.filter (\(_, docNgrams) ->
+                        ngramIdsSet == Set.intersection ngramIdsSet docNgrams) $ Map.toList docsNgramsM
+  let docsWithAllNgrams =
+        List.filter (\(ctxId, _, _) ->
+                       Set.member ctxId docsWithAllNgramsS) docsWithNgrams
+  printDebug "[tfidfAll] docsWithAllNgrams" docsWithAllNgrams
+  let docsWithCounts = Map.fromListWith (+) [ (ctxId, doc_count)
+                                            | (ctxId, _, doc_count) <- docsWithAllNgrams]
+  printDebug "[tfidfAll] docsWithCounts" docsWithCounts
+  let totals = [ ( ctxId
+                 , ngrams_id
+                 , fromIntegral doc_count :: Double
+                 , fromIntegral (fromMaybe 0 $ Map.lookup ctxId docsWithCounts) :: Double)
+               | (ctxId, ngrams_id, doc_count) <- docsWithAllNgrams]
+  let tfidf_sorted = List.sortOn snd [(ctxId, doc_count/s)
+                                     | (ctxId, _, doc_count, s) <- totals]
+  pure $ List.map fst $ List.reverse tfidf_sorted
+-- | Query for searching the 'context_node_ngrams' table so that we
+-- find docs with ANY given 'ngramIds'.
+queryCorpusWithNgrams :: CorpusId -> [Int] -> Select (Column SqlInt4, Column SqlInt4, Column SqlInt4)
+queryCorpusWithNgrams cId ngramIds = proc () -> do
+  row <- queryContextNodeNgramsTable -< ()
+  restrict -< (_cnng_node_id row) .== (pgNodeId cId)
+  restrict -< in_ (sqlInt4 <$> ngramIds) (_cnng_ngrams_id row)
+  returnA -< ( _cnng_context_id row
+             , _cnng_ngrams_id row
+             , _cnng_doc_count row)
+  --returnA -< row
+  -- returnA -< ( _cnng_context_id row
+  --            , _cnng_node_id row
+  --            , _cnng_ngrams_id row
+  --            , _cnng_ngramsType row
+  --            , _cnng_weight row
+  --            , _cnng_doc_count row)
 ------------------------------------------------------------------------
 -- | todo add limit and offset and order
 searchInCorpus :: HasDBid NodeType
@@ -209,4 +283,3 @@ queryContactViaDoc =
                  )
                ) -> Column SqlBool
      cond45 (doc, (corpus, (_,(_,_)))) = doc^.cs_id .== corpus^.nc_context_id
--- a/src/Gargantext/Database/Query/Facet.hs
+++ b/src/Gargantext/Database/Query/Facet.hs
@@ -103,7 +103,7 @@ data Facet id created title hyperdata category ngramCount score =
              , facetDoc_score      :: score
              } deriving (Show, Generic)
 {- | TODO after demo
-data Facet id date hyperdata score = 
+data Facet id date hyperdata score =
     FacetDoc { facetDoc_id        :: id
              , facetDoc_date      :: date
              , facetDoc_hyperdata :: hyperdata
@@ -163,7 +163,7 @@ type FacetPairedReadNull = FacetPaired (Column (Nullable SqlInt4)       )
                                       (Column (Nullable SqlInt4)       )
 type FacetPairedReadNullAgg = FacetPaired (Aggregator (Column (Nullable SqlInt4)       )
-                                                      (Column (Nullable SqlInt4)       ) 
+                                                      (Column (Nullable SqlInt4)       )
                                          )
                                          (Aggregator (Column (Nullable SqlTimestamptz))
                                                      (Column (Nullable SqlTimestamptz))

--- a/src/Gargantext/Database/Query/Join.hs
+++ b/src/Gargantext/Database/Query/Join.hs
@@ -43,7 +43,7 @@ import Opaleye.Internal.Join (NullMaker(..))
 import qualified Opaleye.Internal.Unpackspec()
-keepWhen :: (a -> Field SqlBool) -> SelectArr a a  
+keepWhen :: (a -> Field SqlBool) -> SelectArr a a
 keepWhen p = proc a -> do
  restrict  -< p a
  returnA -< a
@@ -61,7 +61,7 @@ leftJoin2 = leftJoin
 ------------------------------------------------------------------------
 -- | LeftJoin3 in two ways to write it
 leftJoin3 :: Select columnsA -> Select columnsB -> Select columnsC
-      -> ((columnsA, columnsB, columnsC) -> Column SqlBool) 
+      -> ((columnsA, columnsB, columnsC) -> Column SqlBool)
      -> Select (columnsA, columnsB, columnsC)
 leftJoin3 q1 q2 q3 cond = ((,,) <$> q1 <*> q2 <*> q3) >>> keepWhen cond
@@ -82,7 +82,7 @@ leftJoin4' :: Select columnsA
           -> Select columnsB
           -> Select columnsC
           -> Select columnsD
-      -> ((columnsA, columnsB, columnsC, columnsD) -> Column SqlBool) 
+      -> ((columnsA, columnsB, columnsC, columnsD) -> Column SqlBool)
      -> Select (columnsA, columnsB, columnsC, columnsD)
 leftJoin4' q1 q2 q3 q4 cond = ((,,,) <$> q1 <*> q2 <*> q3 <*> q4) >>> keepWhen cond
@@ -375,4 +375,3 @@ leftJoin9   q1 q2    q3     q4     q5     q6      q7    q8     q9
                  ) cond67
                ) cond78
              ) cond89
--- a/src/Gargantext/Database/Query/Table/ContextNodeNgrams.hs
+++ b/src/Gargantext/Database/Query/Table/ContextNodeNgrams.hs
@@ -36,12 +36,13 @@ queryContextNodeNgramsTable = selectTable contextNodeNgramsTable
 -- | Insert utils
 insertContextNodeNgrams :: [ContextNodeNgrams] -> Cmd err Int
 insertContextNodeNgrams = insertContextNodeNgramsW
-                     . map (\(ContextNodeNgrams c n ng nt w) ->
+                     . map (\(ContextNodeNgrams c n ng nt w dc) ->
                              ContextNodeNgrams (pgContextId c)
                                                (pgNodeId n)
                                                (sqlInt4  ng)
                                                (pgNgramsTypeId nt)
                                                (sqlDouble w)
+                                                (sqlInt4 dc)
                            )
 insertContextNodeNgramsW :: [ContextNodeNgramsWrite] -> Cmd err Int

--- a/src/Gargantext/Database/Query/Table/Node/UpdateOpaleye.hs
+++ b/src/Gargantext/Database/Query/Table/Node/UpdateOpaleye.hs
@@ -35,8 +35,9 @@ updateHyperdata i h = mkCmd $ \c -> putStrLn "before runUpdate_" >>
 updateHyperdataQuery :: ToJSON a => NodeId -> a -> Update Int64
 updateHyperdataQuery i h = seq h' $ trace "updateHyperdataQuery: encoded JSON" $ Update
   { uTable      = nodeTable
-   , uUpdateWith = updateEasy (\  (Node _ni _nh _nt _nu _np _nn _nd _h)
+   , uUpdateWith = updateEasy (\  (Node { .. })
-                                -> trace "updating mate" $ Node _ni _nh _nt _nu _np _nn _nd h'
+                                -> Node { _node_hyperdata = h', .. }
+                               -- -> trace "updating mate" $ Node _ni _nh _nt _nu _np _nn _nd h'
                              )
   , uWhere      = (\row -> {-trace "uWhere" $-} _node_id row .== pgNodeId i )
   , uReturning  = rCount
@@ -63,5 +64,3 @@ updateNodesWithType_ :: ( HasNodeError err
 updateNodesWithType_ nt h = do
  ns <- getNodesIdWithType nt
  mapM (\n -> updateHyperdata n h) ns
--- a/src/Gargantext/Database/Schema/ContextNodeNgrams.hs
+++ b/src/Gargantext/Database/Schema/ContextNodeNgrams.hs
@@ -19,6 +19,7 @@ module Gargantext.Database.Schema.ContextNodeNgrams
  where
 import Prelude
+import Gargantext.Core.Types (TermsCount)
 import Gargantext.Database.Schema.Prelude
 import Gargantext.Database.Schema.Ngrams (NgramsTypeId, NgramsId)
 import Gargantext.Database.Admin.Types.Node
@@ -26,15 +27,16 @@ import Gargantext.Database.Admin.Types.Node
 type ContextNodeNgrams =
-  ContextNodeNgramsPoly ContextId ListId NgramsId NgramsTypeId Double
+  ContextNodeNgramsPoly ContextId ListId NgramsId NgramsTypeId Double TermsCount
-data ContextNodeNgramsPoly c n ngrams_id ngt w
+data ContextNodeNgramsPoly c n ngrams_id ngt w dc
   = ContextNodeNgrams { _cnng_context_id   :: !c
                       , _cnng_node_id      :: !n
                       , _cnng_ngrams_id    :: !ngrams_id
                       , _cnng_ngramsType   :: !ngt
                       , _cnng_weight       :: !w
+                       , _cnng_doc_count    :: !dc
                       } deriving (Show)
 type ContextNodeNgramsWrite =
@@ -43,6 +45,7 @@ type ContextNodeNgramsWrite =
                           (Column SqlInt4  )
                           (Column SqlInt4  )
                           (Column SqlFloat8)
+                           (Column SqlInt4  )
 type ContextNodeNgramsRead  =
     ContextNodeNgramsPoly (Column SqlInt4  )
@@ -50,6 +53,7 @@ type ContextNodeNgramsRead  =
                           (Column SqlInt4  )
                           (Column SqlInt4  )
                           (Column SqlFloat8)
+                           (Column SqlInt4  )
 type ContextNodeNgramsReadNull =
     ContextNodeNgramsPoly (Column (Nullable SqlInt4  ))
@@ -57,6 +61,7 @@ type ContextNodeNgramsReadNull =
                           (Column (Nullable SqlInt4  ))
                           (Column (Nullable SqlInt4  ))
                           (Column (Nullable SqlFloat8))
+                           (Column (Nullable SqlInt4  ))
 $(makeAdaptorAndInstance "pContextNodeNgrams" ''ContextNodeNgramsPoly)
 makeLenses ''ContextNodeNgramsPoly
@@ -70,5 +75,6 @@ contextNodeNgramsTable  = Table "context_node_ngrams"
                               , _cnng_ngrams_id  = requiredTableField "ngrams_id"
                               , _cnng_ngramsType = requiredTableField "ngrams_type"
                               , _cnng_weight     = requiredTableField "weight"
+                               , _cnng_doc_count  = requiredTableField "doc_count"
                               }
                          )
--- a/src/Gargantext/Database/Schema/NodeNgrams.hs
+++ b/src/Gargantext/Database/Schema/NodeNgrams.hs
 {-|
 Module      : Gargantext.Database.Schema.NodeNgrams
-Description : 
+Description :
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
@@ -86,7 +86,7 @@ type NgramsClass  = Int
 type NgramsText   = Text
 -- Example of list Ngrams
-- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text 
+-- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text
 type NodeNgramsW =
  NodeNgramsPoly (Maybe NodeNgramsId) NodeId ListType NgramsText

--- a/stack.yaml
+++ b/stack.yaml
@@ -83,7 +83,7 @@ extra-deps:
 - git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
  commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
 - git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git
-  commit: 4d6ae5aad435c00cdae1d47ebb5281d13d7b172c
+  commit: a2d78abeaec9315be765b90d5e51a4a50c48e7b8
 #- git: https://gitlab.iscpif.fr/cgenie/arxiv-api.git
 #- arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588