[refactoring] ngrams and nodestory refactoring

These are large modules, moving functions around, removing unused ones.
parent 3c2c6136
......@@ -6,6 +6,7 @@ with-compiler: ghc-9.4.7
packages:
./
../openalex
source-repository-package
type: git
......@@ -118,10 +119,10 @@ source-repository-package
location: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
tag: c0a08d62c40a169b7934ceb7cb12c39952160e7a
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
tag: cd179f6dda15d77a085c0176284c921b7bc50c46
-- source-repository-package
-- type: git
-- location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
-- tag: cd179f6dda15d77a085c0176284c921b7bc50c46
source-repository-package
type: git
......
......@@ -57,12 +57,17 @@ services:
ports:
- 9000:9000
johnsnownlp:
image: 'johnsnowlabs/nlp-server:latest'
volumes:
- js-cache:/home/johnsnowlabs/cache_pretrained
spacyapi:
image: docker.io/bbieniek/spacyapi:en_v3
ports:
- 5000:5000
- "127.0.0.1:9080:80"
# johnsnownlp:
# image: 'johnsnowlabs/nlp-server:latest'
# volumes:
# - js-cache:/home/johnsnowlabs/cache_pretrained
# ports:
# - 5000:5000
volumes:
#garg-pgdata:
......
......@@ -123,6 +123,7 @@ library
Gargantext.Core.NLP
Gargantext.Core.NodeStory
Gargantext.Core.NodeStory.DB
Gargantext.Core.NodeStory.Utils
Gargantext.Core.NodeStory.Types
Gargantext.Core.Text
Gargantext.Core.Text.Context
......
......@@ -15,7 +15,7 @@ add get
-}
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
-- {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE ConstraintKinds #-}
......@@ -39,148 +39,41 @@ module Gargantext.API.Ngrams
--, rmListNgrams TODO fix before exporting
, apiNgramsTableCorpus
, apiNgramsTableDoc
, NgramsTablePatch
, NgramsTableMap
, NgramsTerm(..)
, NgramsElement(..)
, mkNgramsElement
, RootParent(..)
, MSet
, mSetFromList
, mSetToList
, Repo(..)
, r_version
, r_state
, r_history
, NgramsRepoElement(..)
, saveNodeStory
, initRepo
, TabType(..)
, QueryParamR
, TODO
-- Internals
, getNgramsTableMap
, dumpJsonTableMap
, tableNgramsPull
, tableNgramsPut
, getNgramsTable'
, setNgramsTableScores
, Version
, Versioned(..)
, VersionedWithCount(..)
, currentVersion
, listNgramsChangedSince
, MinSize, MaxSize, OrderBy, NgramsTable
, UpdateTableNgramsCharts
)
where
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader
import Data.Aeson.Text qualified as DAT
import Data.Foldable
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), at, _Just, Each(..), (%%~), ifolded, to, withIndex, over)
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Monoid
import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Set qualified as Set
import Data.Text (isInfixOf, toLower, unpack)
import Data.Text.Lazy.IO as DTL
import Formatting (hprint, int, (%))
import Data.Text (isInfixOf, toLower)
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.API.Metrics qualified as Metrics
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Tools (getNgramsTableMap, getNodeStory, setNgramsTableScores, tableNgramsPull)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude
import Gargantext.Core.NodeStory (ArchiveList, HasNodeStory, HasNodeArchiveStoryImmediateSaver(..), HasNodeStoryImmediateSaver(..), NgramsStatePatch', a_history, a_state, a_version, currentVersion)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasValidationError, ContextId)
import Gargantext.API.Prelude (GargM)
import Gargantext.Core.NodeStory (HasNodeStory, HasNodeArchiveStoryImmediateSaver(..), HasNodeStoryImmediateSaver(..), NgramsStatePatch', a_history, a_state, a_version, currentVersion)
import Gargantext.Core.NodeStory.Utils (saveNodeStory)
-- import Gargantext.Core.Types (Terms(..))
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, assertValid, HasValidationError, ContextId)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
import Gargantext.Prelude hiding (log, to, toLower, (%), isInfixOf)
import Gargantext.Prelude.Clock (hasTime, getTime)
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant hiding (Patch)
{-
-- TODO sequences of modifications (Patchs)
type NgramsIdPatch = Patch NgramsId NgramsPatch
ngramsPatch :: Int -> NgramsPatch
ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
toEdit n p = Edit n p
ngramsIdPatch :: Patch NgramsId NgramsPatch
ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
, replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
, replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
]
-- applyPatchBack :: Patch -> IO Patch
-- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
-}
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
-- TODO: Replace.old is ignored which means that if the current list
-- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
-- the list is going to be `StopTerm` while it should keep `MapTerm`.
-- However this should not happen in non conflicting situations.
mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
mkListsUpdate nt patches =
[ (ngramsTypeId nt, ng, listTypeId lt)
| (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
, lt <- patch ^.. patch_list . new
]
mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
-> NgramsType
-> NgramsTablePatch
-> [(NgramsTypeId, NgramsParent, NgramsChild)]
mkChildrenGroups addOrRem nt patches =
[ (ngramsTypeId nt, parent, child)
| (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
, child <- patch ^.. patch_children . to addOrRem . folded
]
-}
------------------------------------------------------------------------
saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
=> NodeId -> ArchiveList -> m ()
saveNodeStory nId a = do
saver <- view hasNodeStoryImmediateSaver
liftBase $ saver nId a
listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
ngramsStatePatchConflictResolution :: TableNgrams.NgramsType
-> NgramsTerm
-> ConflictResolutionNgramsPatch
......@@ -192,45 +85,6 @@ ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
-- undefined {- TODO think this through -}, listTypeConflictResolution)
-- Current state:
-- Insertions are not considered as patches,
-- they do not extend history,
-- they do not bump version.
insertNewOnly :: a -> Maybe b -> a
insertNewOnly m = maybe m (const $ errorTrace "insertNewOnly: impossible")
-- TODO error handling
{- unused
-- TODO refactor with putListNgrams
copyListNgrams :: RepoCmdM env err m
=> NodeId -> NodeId -> NgramsType
-> m ()
copyListNgrams srcListId dstListId ngramsType = do
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType %~ (Just . f . something))
saveNodeStory
where
f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
-- TODO refactor with putListNgrams
-- The list must be non-empty!
-- The added ngrams must be non-existent!
addListNgrams :: RepoCmdM env err m
=> NodeId -> NgramsType
-> [NgramsElement] -> m ()
addListNgrams listId ngramsType nes = do
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
saveNodeStory
where
m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
-}
-- | TODO: incr the Version number
-- && should use patch
-- UNSAFE
......@@ -260,17 +114,6 @@ setListNgrams listId ngramsType ns = do
-- saveNodeStory
newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
newNgramsFromNgramsStatePatch p =
[ text2ngrams (unNgramsTerm n)
| (n,np) <- p ^.. _PatchMap
-- . each . _PatchMap
. each . _NgramsTablePatch
. _PatchMap . ifolded . withIndex
, _ <- np ^.. patch_new . _Just
]
commitStatePatch :: ( HasNodeStory env err m
......@@ -356,26 +199,6 @@ commitStatePatch listId (Versioned _p_version p) = do
-- This is a special case of tableNgramsPut where the input patch is empty.
tableNgramsPull :: HasNodeStory env err m
=> ListId
-> TableNgrams.NgramsType
-> Version
-> m (Versioned NgramsTablePatch)
tableNgramsPull listId ngramsType p_version = do
-- printDebug "[tableNgramsPull]" (listId, ngramsType)
a <- getNodeStory listId
-- r <- liftBase $ atomically $ readTVar var
let
-- a = r ^. unNodeStory . at listId . non initArchive
q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
q_table = q ^. _PatchMap . at ngramsType . _Just
pure (Versioned (a ^. a_version) q_table)
-- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
-- Apply the given patch to the DB and returns the patch to be applied on the
......@@ -487,26 +310,6 @@ tableNgramsPostChartsAsync utn jobHandle = do
}
-}
getNgramsTableMap :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> m (Versioned NgramsTableMap)
getNgramsTableMap nodeId ngramsType = do
a <- getNodeStory nodeId
pure $ Versioned (a ^. a_version)
(a ^. a_state . at ngramsType . _Just)
dumpJsonTableMap :: HasNodeStory env err m
=> Text
-> NodeId
-> TableNgrams.NgramsType
-> m ()
dumpJsonTableMap fpath nodeId ngramsType = do
m <- getNgramsTableMap nodeId ngramsType
liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
pure ()
-- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ...
......@@ -581,7 +384,7 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
where
addSubitemsOccurrences :: NgramsElement -> NgramsElement
addSubitemsOccurrences e =
e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) }
e & ne_occurrences .~ (foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children))
alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
alterOccurrences occs t = case Map.lookup t tblMap of
......@@ -626,35 +429,6 @@ getNgramsTable' nId listId ngramsType = do
tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
. Map.mapWithKey ngramsElementFromRepo
-- | Helper function to set scores on an `NgramsTable`.
setNgramsTableScores :: forall env err m t.
( Each t t NgramsElement NgramsElement
, HasNodeStory env err m
, HasNodeError err )
=> NodeId
-> ListId
-> TableNgrams.NgramsType
-> t
-> m t
setNgramsTableScores nId listId ngramsType table = do
t1 <- getTime
occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
--printDebug "[setNgramsTableScores] occurrences" occurrences
t2 <- getTime
liftBase $ do
let ngrams_terms = table ^.. each . ne_ngrams
-- printDebug "ngrams_terms" ngrams_terms
hprint stderr
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
(length ngrams_terms) t1 t2
let
setOcc ne = ne & ne_occurrences .~ Set.fromList (msumOf (at (ne ^. ne_ngrams) . _Just) occurrences)
--printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
pure $ table & each %~ setOcc
scoresRecomputeTableNgrams :: forall env err m.
......@@ -674,11 +448,6 @@ scoresRecomputeTableNgrams nId tabType listId = do
-- TODO: find a better place for the code above, All APIs stay here
needsScores :: Maybe OrderBy -> Bool
needsScores (Just ScoreAsc) = True
needsScores (Just ScoreDesc) = True
needsScores _ = False
type TableNgramsApiGet = Summary " Table Ngrams API Get"
:> QueryParamR "ngramsType" TabType
:> QueryParamR "list" ListId
......@@ -810,22 +579,138 @@ apiNgramsAsync _dId =
serveJobsAPI TableNgramsJob $ \jHandle i -> withTracer (printDebug "tableNgramsPostChartsAsync") jHandle $
\jHandle' -> tableNgramsPostChartsAsync i jHandle'
-- Did the given list of ngrams changed since the given version?
-- The returned value is versioned boolean value, meaning that one always retrieve the
-- latest version.
-- If the given version is negative then one simply receive the latest version and True.
-- Using this function is more precise than simply comparing the latest version number
-- with the local version number. Indeed there might be no change to this particular list
-- and still the version number has changed because of other lists.
--
-- Here the added value is to make a compromise between precision, computation, and bandwidth:
-- * currentVersion: good computation, good bandwidth, bad precision.
-- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
-- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
listNgramsChangedSince :: HasNodeStory env err m
=> ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
listNgramsChangedSince listId ngramsType version
| version < 0 =
Versioned <$> currentVersion listId <*> pure True
| otherwise =
tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
-- -- Did the given list of ngrams changed since the given version?
-- -- The returned value is versioned boolean value, meaning that one always retrieve the
-- -- latest version.
-- -- If the given version is negative then one simply receive the latest version and True.
-- -- Using this function is more precise than simply comparing the latest version number
-- -- with the local version number. Indeed there might be no change to this particular list
-- -- and still the version number has changed because of other lists.
-- --
-- -- Here the added value is to make a compromise between precision, computation, and bandwidth:
-- -- * currentVersion: good computation, good bandwidth, bad precision.
-- -- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
-- -- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
-- listNgramsChangedSince :: HasNodeStory env err m
-- => ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
-- listNgramsChangedSince listId ngramsType version
-- | version < 0 =
-- Versioned <$> currentVersion listId <*> pure True
-- | otherwise =
-- tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
-----------------------------
-- Helper functions
newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
newNgramsFromNgramsStatePatch p =
[ text2ngrams (unNgramsTerm n)
| (n,np) <- p ^.. _PatchMap
-- . each . _PatchMap
. each . _NgramsTablePatch
. _PatchMap . ifolded . withIndex
, _ <- np ^.. patch_new . _Just
]
-------------------------
-- Old commented out code
{-
-- TODO sequences of modifications (Patchs)
type NgramsIdPatch = Patch NgramsId NgramsPatch
ngramsPatch :: Int -> NgramsPatch
ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
toEdit n p = Edit n p
ngramsIdPatch :: Patch NgramsId NgramsPatch
ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
, replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
, replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
]
-- applyPatchBack :: Patch -> IO Patch
-- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
-}
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
-- TODO: Replace.old is ignored which means that if the current list
-- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
-- the list is going to be `StopTerm` while it should keep `MapTerm`.
-- However this should not happen in non conflicting situations.
mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
mkListsUpdate nt patches =
[ (ngramsTypeId nt, ng, listTypeId lt)
| (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
, lt <- patch ^.. patch_list . new
]
mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
-> NgramsType
-> NgramsTablePatch
-> [(NgramsTypeId, NgramsParent, NgramsChild)]
mkChildrenGroups addOrRem nt patches =
[ (ngramsTypeId nt, parent, child)
| (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
, child <- patch ^.. patch_children . to addOrRem . folded
]
-}
------------------------------------------------------------------------
-- listTypeConflictResolution :: ListType -> ListType -> ListType
-- listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
-- Current state:
-- Insertions are not considered as patches,
-- they do not extend history,
-- they do not bump version.
-- insertNewOnly :: a -> Maybe b -> a
-- insertNewOnly m = maybe m (const $ errorTrace "insertNewOnly: impossible")
-- -- TODO error handling
{- unused
-- TODO refactor with putListNgrams
copyListNgrams :: RepoCmdM env err m
=> NodeId -> NodeId -> NgramsType
-> m ()
copyListNgrams srcListId dstListId ngramsType = do
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType %~ (Just . f . something))
saveNodeStory
where
f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
-- TODO refactor with putListNgrams
-- The list must be non-empty!
-- The added ngrams must be non-existent!
addListNgrams :: RepoCmdM env err m
=> NodeId -> NgramsType
-> [NgramsElement] -> m ()
addListNgrams listId ngramsType nes = do
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
saveNodeStory
where
m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
-}
-- needsScores :: Maybe OrderBy -> Bool
-- needsScores (Just ScoreAsc) = True
-- needsScores (Just ScoreDesc) = True
-- needsScores _ = False
......@@ -21,7 +21,7 @@ import Data.Map.Strict (fromList)
import Data.Map.Strict qualified as Map
import Data.Text qualified as Text
import Data.Validity
import Gargantext.API.Ngrams (getNgramsTableMap)
import Gargantext.API.Ngrams.Tools (getNgramsTableMap)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.Text.Context (TermList)
......
......@@ -13,29 +13,49 @@ Portability : POSIX
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams.Tools
( filterListWithRoot
, getCoocByNgrams
, getCoocByNgrams'
, getCoocByNgrams''
, getListNgrams
, getNgramsTableMap
, getNodeStory
, getRepo
, getTermsWith
, groupNodesByNgrams
, mapTermListRoot
, mergeNgramsElement
, setNgramsTableScores
, tableNgramsPull
-- debugging
, dumpJsonTableMap
)
where
-- import Gargantext.Core.NodeStoryFile qualified as NSF
import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
import Control.Monad.Reader
import Control.Lens (_Just, (^.), (^..), (%~), (.~), at, each, msumOf, view, At, Each, Index, IxValue)
import Data.Aeson.Text qualified as DAT
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HM
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Validity
-- import GHC.Conc (TVar, readTVar)
import Data.Text qualified as T
import Data.Text.Lazy.IO as DTL
import Formatting (hprint, int, (%))
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.NodeStory (ArchiveList, HasNodeStory, NodeListStory, a_history, a_state, a_version, hasNodeStory, nse_getter, nse_getter_multi, unNodeStory)
import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import Gargantext.Prelude hiding ((%))
import Gargantext.Prelude.Clock (hasTime, getTime)
mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
mergeNgramsElement _neOld neNew = neNew
type RootTerm = NgramsTerm
getRepo :: HasNodeStory env err m
=> [ListId] -> m NodeListStory
......@@ -47,16 +67,6 @@ getRepo listIds = do
-- pure $ v'
repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
-> NodeId
-> Map.Map k1 Int
repoSize repo node_id = Map.map Map.size state'
where
state' = repo ^. unNodeStory
. at node_id . _Just
. a_state
getNodeStory :: HasNodeStory env err m
=> ListId -> m ArchiveList
getNodeStory l = do
......@@ -136,17 +146,6 @@ mapTermListRoot nodeIds ngramsType repo =
filterListWithRootHashMap :: ListType
-> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
-> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
where
isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> l == lt
Just r -> case HM.lookup r m of
Nothing -> panicTrace $ "[Garg.API.Ngrams.Tools] filterListWithRootHashMap, unknown key: " <> unNgramsTerm r
Just (l',_) -> l' == lt
filterListWithRoot :: [ListType]
-> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
-> HashMap NgramsTerm (Maybe RootTerm)
......@@ -175,8 +174,6 @@ groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
Nothing -> (t, ns)
Just r' -> (r',ns)
data Diagonal = Diagonal Bool
getCoocByNgrams :: Diagonal
-> HashMap NgramsTerm (Set NodeId)
-> HashMap (NgramsTerm, NgramsTerm) Int
......@@ -233,7 +230,82 @@ getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
getNgramsTableMap :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> m (Versioned NgramsTableMap)
getNgramsTableMap nodeId ngramsType = do
a <- getNodeStory nodeId
pure $ Versioned (a ^. a_version)
(a ^. a_state . at ngramsType . _Just)
dumpJsonTableMap :: HasNodeStory env err m
=> Text
-> NodeId
-> TableNgrams.NgramsType
-> m ()
dumpJsonTableMap fpath nodeId ngramsType = do
m <- getNgramsTableMap nodeId ngramsType
liftBase $ DTL.writeFile (T.unpack fpath) (DAT.encodeToLazyText m)
pure ()
-- | Helper function to set scores on an `NgramsTable`.
setNgramsTableScores :: forall env err m t.
( Each t t NgramsElement NgramsElement
, HasNodeStory env err m
, HasNodeError err )
=> NodeId
-> ListId
-> TableNgrams.NgramsType
-> t
-> m t
setNgramsTableScores nId listId ngramsType table = do
t1 <- getTime
occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
--printDebug "[setNgramsTableScores] occurrences" occurrences
t2 <- getTime
liftBase $ do
let ngrams_terms = table ^.. each . ne_ngrams
-- printDebug "ngrams_terms" ngrams_terms
hprint stderr
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
(length ngrams_terms) t1 t2
let
setOcc ne = ne & ne_occurrences .~ Set.fromList (msumOf (at (ne ^. ne_ngrams) . _Just) occurrences)
--printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
pure $ table & each %~ setOcc
-- This is a special case of tableNgramsPut where the input patch is empty.
tableNgramsPull :: HasNodeStory env err m
=> ListId
-> TableNgrams.NgramsType
-> Version
-> m (Versioned NgramsTablePatch)
tableNgramsPull listId ngramsType p_version = do
-- printDebug "[tableNgramsPull]" (listId, ngramsType)
a <- getNodeStory listId
-- r <- liftBase $ atomically $ readTVar var
let
-- a = r ^. unNodeStory . at listId . non initArchive
q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
q_table = q ^. _PatchMap . at ngramsType . _Just
pure (Versioned (a ^. a_version) q_table)
------------------------------------------
-- Unused functions
-- migrateFromDirToDb :: (HasNodeStory env err m) -- , HasNodeStory env err m)
......@@ -253,3 +325,26 @@ getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
-- ) $ Map.toList nls
-- --_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
-- pure ()
-- filterListWithRootHashMap :: ListType
-- -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
-- -> HashMap NgramsTerm (Maybe RootTerm)
-- filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
-- where
-- isMapTerm (l, maybeRoot) = case maybeRoot of
-- Nothing -> l == lt
-- Just r -> case HM.lookup r m of
-- Nothing -> panicTrace $ "[Garg.API.Ngrams.Tools] filterListWithRootHashMap, unknown key: " <> unNgramsTerm r
-- Just (l',_) -> l' == lt
-- repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
-- -> NodeId
-- -> Map.Map k1 Int
-- repoSize repo node_id = Map.map Map.size state'
-- where
-- state' = repo ^. unNodeStory
-- . at node_id . _Just
-- . a_state
......@@ -15,7 +15,108 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-}
{-# OPTIONS -fno-warn-orphans #-}
module Gargantext.API.Ngrams.Types where
module Gargantext.API.Ngrams.Types
( AddRem
, addPatch
, ConflictResolutionNgramsPatch
, Diagonal(..)
, QueryParamR
, mkNgramsTablePatch
, MSet(..)
, mSetFromList
, mSetFromSet
, mSetToList
, mSetToSet
, NgramsElement(..)
, mkNgramsElement
, ne_children
, ne_list
, ne_ngrams
, ne_occurrences
, ne_parent
, ne_root
, ne_size
, NgramsList
, NgramsPatch(..)
, patch_list
, patch_new
, patch_old
, NgramsRepoElement(..)
, nre_children
, nre_list
, nre_parent
, nre_root
, nre_size
, NgramsSearchQuery(..)
, NgramsTable(..)
, _NgramsTable
, mockTable
, NgramsTableMap
, NgramsTablePatch(..)
, _NgramsTablePatch
, NgramsTerm(..)
, ngramsElementFromRepo
, ngramsElementToRepo
, ngramsTypeFromTabType
, OrderBy(..)
, PatchMap
, _PatchMap
, PatchMSet(..)
, unPatchMSet
, PatchSet(..)
, add
, rem
, Repo(..)
, initRepo
, r_version
, r_state
, r_history
, RepoCmdM
, RootParent(..)
, rp_parent
, RootTerm
, TabType(..)
, UpdateTableNgramsCharts(..)
, utn_list_id
, utn_tab_type
, Version
, Versioned(..)
, v_data
, v_version
, VersionedWithCount(..)
, toVersionedWithCount
, vc_count
, vc_data
, vc_version
)
where
import Codec.Serialise (Serialise())
import Control.Category ((>>>))
......@@ -43,7 +144,7 @@ import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger
import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (IsString, hash, from, replace, to)
import Gargantext.Prelude hiding (IsString, hash, from, rem, replace, to)
import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
import Servant hiding (Patch)
import Servant.Job.Utils (jsonOptions)
......@@ -54,6 +155,13 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
type QueryParamR = QueryParam' '[Required, Strict]
type RootTerm = NgramsTerm
data Diagonal = Diagonal Bool
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
data TabType = Docs | Trash | MoreFav | MoreTrash
......
......@@ -32,7 +32,8 @@ import Data.Text.Encoding qualified as TE
import EPO.API.Client.Types qualified as EPO
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Ngrams (commitStatePatch)
import Gargantext.API.Ngrams.Types (Versioned(..))
import Gargantext.API.Node.Corpus.New.Types
import Gargantext.API.Node.Corpus.Searx
import Gargantext.API.Node.Corpus.Types
......
......@@ -27,7 +27,8 @@ import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Ngrams (commitStatePatch)
import Gargantext.API.Ngrams.Types (Versioned(..))
import Gargantext.API.Prelude (GargM)
import Gargantext.Core (Lang(..))
import Gargantext.Core.NodeStory (HasNodeStoryImmediateSaver, HasNodeArchiveStoryImmediateSaver, currentVersion)
......
{-|
Module : Gargantext.Core.NodeStory.Utils
Description : NodeStory utilities
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.NodeStory.Utils
( saveNodeStory )
where
import Control.Lens (view)
import Gargantext.Core.NodeStory
import Gargantext.Core.Types (NodeId)
import Gargantext.Prelude
saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
=> NodeId -> ArchiveList -> m ()
saveNodeStory nId a = do
saver <- view hasNodeStoryImmediateSaver
liftBase $ saver nId a
......@@ -20,8 +20,7 @@ module Gargantext.Core.Text.List.Merge
import Control.Lens (view)
import Data.Map.Strict.Patch hiding (PatchMap)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types (NgramsRepoElement, NgramsTerm, PatchMap, Versioned(..), v_data, v_version)
import Gargantext.Prelude hiding (diff)
type List = Map NgramsTerm NgramsRepoElement
......
......@@ -21,32 +21,33 @@ module Gargantext.Core.Viz.Graph.API
import Control.Lens (set, (^.), _Just, (^?), at)
import Data.Aeson
import Data.HashMap.Strict qualified as HashMap
import Data.Swagger
import Data.Swagger (ToSchema)
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Errors.Types
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Prelude
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, getCoocByNgrams'', getRepo, groupNodesByNgrams, mapTermListRoot)
import Gargantext.API.Ngrams.Types (Diagonal(..))
import Gargantext.API.Prelude (GargM, GargServer)
import Gargantext.Core.Methods.Similarities (Similarity(..), GraphMetric(..), withMetric)
import Gargantext.Core.NodeStory
import Gargantext.Core.Types.Main
import Gargantext.Core.NodeStory (HasNodeStory, NodeListStory, a_version, unNodeStory)
import Gargantext.Core.Types.Main (ListType(MapTerm))
import Gargantext.Core.Viz.Graph.GEXF ()
import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
import Gargantext.Core.Viz.Graph.Types
import Gargantext.Core.Viz.Graph.Types (Graph, GraphMetadata(..), HyperdataGraph(..), HyperdataGraphAPI(..), LegendField(..), ListForGraph(..), MultiPartite(..), Partite(..), Strength(Strong), hyperdataGraph, gm_edgesStrength, gm_list, gm_metric, gm_startForceAtlas, graph_metadata, hyperdataCamera, lfg_version)
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (CorpusId, ListId, NodeId, NodeType(NodeCorpus, NodeGraph, NodeList), UserId)
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node (defaultList, getClosestParentIdByType, getNodeWith, getOrMkList)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Database.Schema.Node (node_hyperdata, node_name)
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant
import Servant ((:>), (:<|>)(..), Get, Header, Headers, JSON, Post, ReqBody, ServerT, Summary, addHeader)
import Servant.Job.Async (AsyncJobsAPI)
import Servant.XML.Conduit (XML)
......@@ -58,7 +59,7 @@ type GraphAPI = Get '[JSON] HyperdataGraphAPI
:<|> "clone"
:> ReqBody '[JSON] HyperdataGraphAPI
:> Post '[JSON] NodeId
:<|> "gexf" :> Get '[XML] (Headers '[Servant.Header "Content-Disposition" Text] Graph)
:<|> "gexf" :> Get '[XML] (Headers '[Header "Content-Disposition" Text] Graph)
:<|> "versions" :> GraphVersionsAPI
data GraphVersions =
......@@ -337,10 +338,10 @@ graphClone userId pId (HyperdataGraphAPI { _hyperdataAPIGraph = graph
------------------------------------------------------------
--getGraphGexf :: UserId
-- -> NodeId
-- -> GargNoServer (Headers '[Servant.Header "Content-Disposition" Text] Graph)
-- -> GargNoServer (Headers '[Header "Content-Disposition" Text] Graph)
getGraphGexf :: HasNodeStory env err m
=> NodeId
-> m (Headers '[Servant.Header "Content-Disposition" Text] Graph)
-> m (Headers '[Header "Content-Disposition" Text] Graph)
getGraphGexf nId = do
HyperdataGraphAPI { _hyperdataAPIGraph = graph } <- getGraph nId
pure $ addHeader "attachment; filename=graph.gexf" graph
......@@ -23,10 +23,10 @@ import Data.List qualified as List
import Data.Map.Strict (toList)
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Gargantext.API.Ngrams (saveNodeStory)
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory (HasNodeStory, a_history, a_state, a_version)
import Gargantext.Core.NodeStory.Utils (saveNodeStory)
import Gargantext.Core.Types (HasValidationError(..), assertValid)
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Admin.Types.Node
......
......@@ -27,7 +27,8 @@ import Database.PostgreSQL.Simple (Query, Only(..))
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, getCoocByNgrams, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Types (Diagonal(..), RootTerm)
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
import Gargantext.Core (HasDBid(toDBid))
import Gargantext.Core.NodeStory
......
......@@ -20,11 +20,12 @@ import Data.Map.Strict.Patch qualified as PM
import Data.Set qualified as Set
import Database.PostgreSQL.Simple qualified as PSQL
import Database.PostgreSQL.Simple.SqlQQ
import Gargantext.API.Ngrams (commitStatePatch, mSetFromList, setListNgrams, saveNodeStory)
import Gargantext.API.Ngrams.Types (MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTerm(..), Versioned(..), mkNgramsTablePatch, nre_children, nre_list, nre_parent, nre_root)
import Gargantext.API.Ngrams (commitStatePatch, setListNgrams)
import Gargantext.API.Ngrams.Types (MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTerm(..), Versioned(..), mkNgramsTablePatch, mSetFromList, nre_children, nre_list, nre_parent, nre_root)
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.Core.NodeStory hiding (runPGSQuery)
import Gargantext.Core.Types.Individu
import Gargantext.Core.NodeStory (ArchiveList, a_state, a_version, currentVersion, initArchive)
import Gargantext.Core.NodeStory.Utils (saveNodeStory)
import Gargantext.Core.Types.Individu ()
import Gargantext.Core.Types (ListType(..), ListId, NodeId, UserId)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Config (userMaster)
......@@ -124,7 +125,7 @@ insertNewTermsToNodeStoryTest env = do
a <- getNodeStory listId
liftIO $ do
a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls })
a `shouldBe` (initArchiveList & a_state .~ Map.singleton NgramsTerms nls )
-- check that the ngrams are in the DB as well
ngramsMap <- selectNgramsId [unNgramsTerm terms]
liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` [unNgramsTerm terms]
......@@ -154,7 +155,7 @@ insertNewTermsWithChildrenToNodeStoryTest env = do
a <- getNodeStory listId
liftIO $ do
a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls })
a `shouldBe` (initArchiveList & a_state .~ Map.singleton NgramsTerms nls )
-- `setListNgrams` calls saveNodeStory already so we should have
-- the terms in the DB by now
......@@ -196,7 +197,7 @@ insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest env = do
a <- getNodeStory listId
liftIO $ do
a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nlsWithChildFixed })
a `shouldBe` (initArchiveList & a_state .~ Map.singleton NgramsTerms nlsWithChildFixed )
ngramsMap <- selectNgramsId terms
liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` terms
......@@ -228,7 +229,7 @@ setListNgramsUpdatesNodeStoryTest env = do
a <- getNodeStory listId
liftIO $ do
a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls })
a `shouldBe` (initArchiveList & a_state .~ Map.singleton NgramsTerms nls )
-- check that the ngrams are in the DB as well
ngramsMap <- selectNgramsId [unNgramsTerm terms]
liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` [unNgramsTerm terms]
......@@ -245,7 +246,7 @@ setListNgramsUpdatesNodeStoryTest env = do
a' <- getNodeStory listId
liftIO $ do
a' `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms $ nls <> nls2 })
a' `shouldBe` (initArchiveList & a_state .~ (Map.singleton NgramsTerms $ nls <> nls2 ) )
setListNgramsUpdatesNodeStoryWithChildrenTest :: TestEnv -> Assertion
......@@ -261,7 +262,7 @@ setListNgramsUpdatesNodeStoryWithChildrenTest env = do
a <- getNodeStory listId
liftIO $ do
a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls })
a `shouldBe` (initArchiveList & a_state .~ Map.singleton NgramsTerms nls )
-- OK, now we substitute parent with no children, the parent of
-- 'nreChild' should become Nothing
......@@ -276,7 +277,7 @@ setListNgramsUpdatesNodeStoryWithChildrenTest env = do
a' <- getNodeStory listId
liftIO $ do
a' `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nlsNew })
a' `shouldBe` (initArchiveList & a_state .~ Map.singleton NgramsTerms nlsNew )
commitPatchSimpleTest :: TestEnv -> Assertion
......@@ -286,7 +287,7 @@ commitPatchSimpleTest env = do
-- initially, the node story table is empty
liftIO $ do
a `shouldBe` (initArchiveList { _a_state = Map.empty })
a `shouldBe` (initArchiveList & a_state .~ Map.empty )
let (term, nre) = simpleTerm
let tPatch = NgramsReplace { _patch_old = Nothing
......@@ -303,5 +304,5 @@ commitPatchSimpleTest env = do
a' <- getNodeStory listId
liftIO $ do
a' `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls
, _a_version = ver + 1 })
a' `shouldBe` (initArchiveList & a_state .~ Map.singleton NgramsTerms nls
& a_version .~ ver + 1 )
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment