Commit 1b2ff615 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[nodeStories] node stories in db work now

parent d7ca3aee
Pipeline #3009 failed with stage
in 53 minutes and 55 seconds
......@@ -161,6 +161,7 @@ library
Gargantext.Core.Methods.Graph.BAC.Proxemy
Gargantext.Core.Methods.Graph.MaxClique
Gargantext.Core.Methods.Matrix.Accelerate.Utils
Gargantext.Core.NodeStoryFile
Gargantext.Core.Statistics
Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Arxiv
......@@ -269,7 +270,6 @@ library
Gargantext.Database.Admin.Types.Hyperdata.User
Gargantext.Database.Admin.Types.Metrics
Gargantext.Database.GargDB
Gargantext.Database.NodeStory
Gargantext.Database.Query
Gargantext.Database.Query.Facet
Gargantext.Database.Query.Filter
......
......@@ -27,7 +27,7 @@ import Data.Maybe (fromMaybe)
import Data.Pool (Pool, createPool)
import Database.PostgreSQL.Simple (Connection, connect, close, ConnectInfo)
import Gargantext.Core.NodeStory
import Gargantext.Prelude.Config (GargConfig(..), {-gc_repofilepath,-} readConfig)
import Gargantext.Prelude.Config ({-GargConfig(..),-} {-gc_repofilepath,-} readConfig)
import Network.HTTP.Client.TLS (newTlsManager)
import Servant.Auth.Server (defaultJWTSettings, CookieSettings(..), XsrfCookieSettings(..), defaultCookieSettings, defaultXsrfCookieSettings, readKey, writeKey)
import Servant.Client (parseBaseUrl)
......@@ -180,7 +180,8 @@ newEnv port file = do
self_url_env <- parseBaseUrl $ "http://0.0.0.0:" <> show port
dbParam <- databaseParameters file
pool <- newPool dbParam
nodeStory_env <- readNodeStoryEnv (_gc_repofilepath config_env)
--nodeStory_env <- readNodeStoryEnv (_gc_repofilepath config_env)
nodeStory_env <- readNodeStoryEnv pool
scrapers_env <- newJobEnv defaultSettings manager_env
logger <- newStderrLoggerSet defaultBufSize
config_mail <- Mail.readConfig file
......
......@@ -22,7 +22,7 @@ import Gargantext.API.Prelude
import Gargantext.Core.NodeStory
import Gargantext.Database.Prelude
import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..), readConfig)
import Gargantext.Prelude.Config (readConfig)
import qualified Gargantext.Prelude.Mail as Mail
import Servant
import System.IO (FilePath)
......@@ -38,8 +38,9 @@ withDevEnv iniPath k = do
newDevEnv = do
cfg <- readConfig iniPath
dbParam <- databaseParameters iniPath
nodeStory_env <- readNodeStoryEnv (_gc_repofilepath cfg)
--nodeStory_env <- readNodeStoryEnv (_gc_repofilepath cfg)
pool <- newPool dbParam
nodeStory_env <- readNodeStoryEnv pool
setts <- devSettings devJwkFile
mail <- Mail.readConfig iniPath
pure $ DevEnv
......@@ -61,7 +62,7 @@ runCmdReplServantErr = runCmdRepl
-- the command.
-- This function is constrained to the DevEnv rather than
-- using HasConnectionPool and HasRepoVar.
runCmdDev :: (Show err) => DevEnv -> Cmd'' DevEnv err a -> IO a
runCmdDev :: Show err => DevEnv -> Cmd'' DevEnv err a -> IO a
runCmdDev env f =
(either (fail . show) pure =<< runCmd env f)
`finally`
......
......@@ -106,7 +106,7 @@ import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
......@@ -261,7 +261,9 @@ setListNgrams listId ngramsType ns = do
currentVersion :: HasNodeStory env err m
=> ListId -> m Version
currentVersion listId = do
nls <- getRepo [listId]
--nls <- getRepo [listId]
pool <- view connPool
nls <- liftBase $ getNodeStory pool listId
pure $ nls ^. unNodeStory . at listId . _Just . a_version
......
......@@ -22,7 +22,8 @@ import Data.Hashable (Hashable)
import Data.Set (Set)
import Data.Validity
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
......@@ -30,6 +31,7 @@ import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Gargantext.Core.NodeStory
mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
mergeNgramsElement _neOld neNew = neNew
......@@ -193,3 +195,19 @@ getCoocByNgrams' f (Diagonal diag) m =
where ks = HM.keys m
------------------------------------------
migrateFromDirToDb :: (CmdM env err m, HasNodeStory env err m)
=> m ()
migrateFromDirToDb = do
pool <- view connPool
listIds <- liftBase $ getNodesIdWithType pool NodeList
(NodeStory nls) <- getRepo listIds
_ <- mapM (\(nId, a) -> do
n <- liftBase $ nodeExists pool nId
case n of
False -> pure 0
True -> liftBase $ upsertNodeArchive pool nId a
) $ Map.toList nls
--_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
pure ()
This diff is collapsed.
{- NOTE This is legacy code. It keeps node stories in a directory
repo. We now have migrated to the DB. However this code is needed to
make the migration (see Gargantext.API.Ngrams.Tools) -}
module Gargantext.Core.NodeStoryFile where
import Control.Lens (view)
import Control.Monad (foldM)
import Codec.Serialise (serialise, deserialise)
import Codec.Serialise.Class
import Control.Concurrent (MVar(), modifyMVar_, newMVar, readMVar, withMVar)
import Control.Debounce (mkDebounce, defaultDebounceSettings, debounceFreq, debounceAction)
import Gargantext.Core.NodeStory
import Gargantext.Core.Types (ListId, NodeId(..))
import Gargantext.Prelude
import System.Directory (renameFile, createDirectoryIfMissing, doesFileExist, removeFile)
import System.IO (FilePath, hClose)
import System.IO.Temp (withTempFile)
import qualified Data.ByteString.Lazy as DBL
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
getRepo :: HasNodeStory env err m
=> [ListId] -> m NodeListStory
getRepo listIds = do
f <- getNodeListStory
v <- liftBase $ f listIds
v' <- liftBase $ readMVar v
pure $ v'
getNodeListStory :: HasNodeStory env err m
=> m ([NodeId] -> IO (MVar NodeListStory))
getNodeListStory = do
env <- view hasNodeStory
pure $ view nse_getter env
readNodeStoryEnv :: NodeStoryDir -> IO NodeStoryEnv
readNodeStoryEnv nsd = do
mvar <- nodeStoryVar nsd Nothing []
saver <- mkNodeStorySaver nsd mvar
pure $ NodeStoryEnv { _nse_var = mvar
, _nse_saver = saver
, _nse_getter = nodeStoryVar nsd (Just mvar) }
------------------------------------------------------------------------
mkNodeStorySaver :: NodeStoryDir -> MVar NodeListStory -> IO (IO ())
mkNodeStorySaver nsd mvns = mkDebounce settings
where
settings = defaultDebounceSettings
{ debounceAction = withMVar mvns (writeNodeStories nsd)
, debounceFreq = 1 * minute
-- , debounceEdge = trailingEdge -- Trigger on the trailing edge
}
minute = 60 * second
second = 10^(6 :: Int)
nodeStoryVar :: NodeStoryDir
-> Maybe (MVar NodeListStory)
-> [NodeId]
-> IO (MVar NodeListStory)
nodeStoryVar nsd Nothing ni = nodeStoryIncs nsd Nothing ni >>= newMVar
nodeStoryVar nsd (Just mv) ni = do
_ <- modifyMVar_ mv $ \mv' -> (nodeStoryIncs nsd (Just mv') ni)
pure mv
nodeStoryInc :: NodeStoryDir -> Maybe NodeListStory -> NodeId -> IO NodeListStory
nodeStoryInc nsd (Just ns@(NodeStory nls)) ni = do
case Map.lookup ni nls of
Nothing -> do
(NodeStory nls') <- nodeStoryRead nsd ni
pure $ NodeStory $ Map.union nls nls'
Just _ -> pure ns
nodeStoryInc nsd Nothing ni = nodeStoryRead nsd ni
nodeStoryIncs :: NodeStoryDir
-> Maybe NodeListStory
-> [NodeId]
-> IO NodeListStory
nodeStoryIncs _ Nothing [] = pure $ NodeStory $ Map.empty
nodeStoryIncs nsd (Just nls) ns = foldM (\m n -> nodeStoryInc nsd (Just m) n) nls ns
nodeStoryIncs nsd Nothing (ni:ns) = do
m <- nodeStoryRead nsd ni
nodeStoryIncs nsd (Just m) ns
nodeStoryDec :: NodeStoryDir
-> NodeListStory
-> NodeId
-> IO NodeListStory
nodeStoryDec nsd ns@(NodeStory nls) ni = do
case Map.lookup ni nls of
Nothing -> do
-- we make sure the corresponding file repo is really removed
_ <- nodeStoryRemove nsd ni
pure ns
Just _ -> do
let ns' = Map.filterWithKey (\k _v -> k /= ni) nls
_ <- nodeStoryRemove nsd ni
pure $ NodeStory ns'
-- | TODO lock
nodeStoryRead :: NodeStoryDir -> NodeId -> IO NodeListStory
nodeStoryRead nsd ni = do
_repoDir <- createDirectoryIfMissing True nsd
let nsp = nodeStoryPath nsd ni
exists <- doesFileExist nsp
if exists
then deserialise <$> DBL.readFile nsp
else pure (initNodeStory ni)
nodeStoryRemove :: NodeStoryDir -> NodeId -> IO ()
nodeStoryRemove nsd ni = do
let nsp = nodeStoryPath nsd ni
exists <- doesFileExist nsp
if exists
then removeFile nsp
else pure ()
nodeStoryRead_test :: NodeStoryDir -> NodeId -> IO (Maybe [ TableNgrams.NgramsType ])
nodeStoryRead_test nsd ni = nodeStoryRead nsd ni >>= \n -> pure
$ fmap Map.keys
$ fmap _a_state
$ Map.lookup ni
$ _unNodeStory n
------------------------------------------------------------------------
type NodeStoryDir = FilePath
writeNodeStories :: NodeStoryDir -> NodeListStory -> IO ()
writeNodeStories fp nls = do
_done <- mapM (writeNodeStory fp) $ splitByNode nls
-- printDebug "[writeNodeStories]" done
pure ()
writeNodeStory :: NodeStoryDir -> (NodeId, NodeListStory) -> IO ()
writeNodeStory rdfp (n, ns) = saverAction' rdfp n ns
splitByNode :: NodeListStory -> [(NodeId, NodeListStory)]
splitByNode (NodeStory m) =
List.map (\(n,a) -> (n, NodeStory $ Map.singleton n a)) $ Map.toList m
saverAction' :: Serialise a => NodeStoryDir -> NodeId -> a -> IO ()
saverAction' repoDir nId a = do
withTempFile repoDir ((cs $ show nId) <> "-tmp-repo.cbor") $ \fp h -> do
-- printDebug "[repoSaverAction]" fp
DBL.hPut h $ serialise a
hClose h
renameFile fp (nodeStoryPath repoDir nId)
nodeStoryPath :: NodeStoryDir -> NodeId -> FilePath
nodeStoryPath repoDir nId = repoDir <> "/" <> filename
where
filename = "repo" <> "-" <> (cs $ show nId) <> ".cbor"
------------------------------------------------------------------------
-- TODO : repo Migration TODO TESTS
{-
repoMigration :: NodeStoryDir -> NgramsRepo -> IO ()
repoMigration fp r = writeNodeStories fp (repoToNodeListStory r)
repoToNodeListStory :: NgramsRepo -> NodeListStory
repoToNodeListStory (Repo _v s h) = NodeStory $ Map.fromList ns
where
s' = ngramsState_migration s
h' = ngramsStatePatch_migration h
ns = List.map (\(n,ns')
-> (n, let hs = fromMaybe [] (Map.lookup n h') in
Archive { _a_version = List.length hs
, _a_state = ns'
, _a_history = hs }
)
) $ Map.toList s'
ngramsState_migration :: NgramsState
-> Map NodeId NgramsState'
ngramsState_migration ns =
Map.fromListWith (Map.union) $
List.concat $
map (\(nt, nTable)
-> map (\(nid, table)
-> (nid, Map.singleton nt table)
) $ Map.toList nTable
) $ Map.toList ns
ngramsStatePatch_migration :: [NgramsStatePatch]
-> Map NodeId [NgramsStatePatch']
ngramsStatePatch_migration np' = Map.fromListWith (<>)
$ List.concat
$ map toPatch np'
where
toPatch :: NgramsStatePatch -> [(NodeId, [NgramsStatePatch'])]
toPatch p =
List.concat $
map (\(nt, nTable)
-> map (\(nid, table)
-> (nid, [fst $ Patch.singleton nt table])
) $ Patch.toList nTable
) $ Patch.toList p
-}
......@@ -9,6 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
......
{-# LANGUAGE Arrows #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.NodeStory where
import Control.Arrow (returnA)
import Control.Concurrent.MVar.Lifted (MVar(), withMVar, newMVar, modifyMVar_)
--import Control.Debounce (mkDebounce, defaultDebounceSettings, debounceFreq, debounceAction)
import Control.Monad (foldM)
import qualified Data.Map.Strict as Map
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Gargantext.API.Ngrams.Tools (getRepo)
import Gargantext.Core (HasDBid)
import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.NodeStory (Archive(..), NodeStory(..), NodeListStory, NgramsState', NgramsStatePatch')
import qualified Gargantext.Core.NodeStory as NS
import Gargantext.Core.Types (NodeId(..), NodeType(..))
import Gargantext.Database.Prelude (CmdM, mkCmd, runOpaQuery)
import Gargantext.Database.Query.Table.Node (getNodesIdWithType, nodeExists)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Prelude
import Opaleye hiding (FromField)
import Opaleye.Internal.Table (Table(..))
data NodeStoryPoly a b = NodeStoryDB { node_id :: a
, archive :: b }
deriving (Eq)
type ArchiveQ = Archive NgramsState' NgramsStatePatch'
type NodeListStoryQ = NodeStoryPoly Int ArchiveQ
type NodeStoryWrite = NodeStoryPoly (Column SqlInt4) (Column SqlJsonb)
type NodeStoryRead = NodeStoryPoly (Column SqlInt4) (Column SqlJsonb)
$(makeAdaptorAndInstance "pNodeStory" ''NodeStoryPoly)
nodeStoryTable :: Table NodeStoryRead NodeStoryWrite
nodeStoryTable =
Table "node_stories"
( pNodeStory NodeStoryDB { node_id = tableField "node_id"
, archive = tableField "archive" } )
nodeStorySelect :: Select NodeStoryRead
nodeStorySelect = selectTable nodeStoryTable
getNodeStory :: CmdM env err m => NodeId -> m NodeListStory
getNodeStory (NodeId nodeId) = do
res <- runOpaQuery query
pure $ NodeStory $ Map.fromListWith (<>) $ (\(NodeStoryDB nId a) -> (nId, a)) <$> res
where
query :: Select NodeStoryRead
query = proc () -> do
row@(NodeStoryDB node_id _) <- nodeStorySelect -< ()
restrict -< node_id .== sqlInt4 nodeId
returnA -< row
insertNodeArchive :: CmdM env err m => NodeId -> ArchiveQ -> m Int64
insertNodeArchive (NodeId nId) a = mkCmd $ \c -> runInsert c insert
where
insert = Insert { iTable = nodeStoryTable
, iRows = [NodeStoryDB { node_id = sqlInt4 nId
, archive = sqlValueJSONB a }]
, iReturning = rCount
, iOnConflict = Nothing }
updateNodeArchive :: CmdM env err m => NodeId -> ArchiveQ -> m Int64
updateNodeArchive (NodeId nId) a = mkCmd $ \c -> runUpdate c update
where
update = Update { uTable = nodeStoryTable
, uUpdateWith = updateEasy (\(NodeStoryDB { .. }) -> NodeStoryDB { archive = sqlValueJSONB a, .. })
, uWhere = (\row -> node_id row .== sqlInt4 nId)
, uReturning = rCount }
nodeStoryRemove :: CmdM env err m => NodeId -> m Int64
nodeStoryRemove (NodeId nId) = mkCmd $ \c -> runDelete c delete
where
delete = Delete { dTable = nodeStoryTable
, dWhere = (\row -> node_id row .== sqlInt4 nId)
, dReturning = rCount }
upsertNodeArchive :: CmdM env err m => NodeId -> ArchiveQ -> m Int64
upsertNodeArchive nId a = do
(NodeStory m) <- getNodeStory nId
case Map.lookup nId m of
Nothing -> insertNodeArchive nId a
Just _ -> updateNodeArchive nId a
writeNodeStories :: CmdM env err m => NodeListStory -> m ()
writeNodeStories (NodeStory nls) = do
_ <- mapM (\(nId, a) -> upsertNodeArchive nId a) $ Map.toList nls
pure ()
-- | Returns a `NodeListStory`, updating the given one for given `NodeId`
nodeStoryInc :: CmdM env err m => Maybe NodeListStory -> NodeId -> m NodeListStory
nodeStoryInc Nothing nId = getNodeStory nId
nodeStoryInc (Just ns@(NodeStory nls)) nId = do
case Map.lookup nId nls of
Nothing -> do
(NodeStory nls') <- getNodeStory nId
pure $ NodeStory $ Map.union nls nls'
Just _ -> pure ns
nodeStoryIncs :: CmdM env err m => Maybe NodeListStory -> [NodeId] -> m NodeListStory
nodeStoryIncs Nothing [] = pure $ NodeStory $ Map.empty
nodeStoryIncs (Just nls) ns = foldM (\m n -> nodeStoryInc (Just m) n) nls ns
nodeStoryIncs Nothing (ni:ns) = do
m <- getNodeStory ni
nodeStoryIncs (Just m) ns
nodeStoryDec :: CmdM env err m => NodeListStory -> NodeId -> m NodeListStory
nodeStoryDec ns@(NodeStory nls) ni = do
case Map.lookup ni nls of
Nothing -> do
_ <- nodeStoryRemove ni
pure ns
Just _ -> do
let ns' = Map.filterWithKey (\k _v -> k /= ni) nls
_ <- nodeStoryRemove ni
pure $ NodeStory ns'
migrateFromDir :: (HasMail env, HasNodeError err, NS.HasNodeStory env err m, HasDBid NodeType)
=> m ()
migrateFromDir = do
listIds <- getNodesIdWithType NodeList
(NodeStory nls) <- getRepo listIds
_ <- mapM (\(nId, a) -> do
n <- nodeExists nId
case n of
False -> pure 0
True -> upsertNodeArchive nId a
) $ Map.toList nls
--_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
pure ()
------------------------------------
data NodeStoryEnv env err m = NodeStoryEnv
{ _nse_var :: !(MVar NodeListStory)
, _nse_saver :: !(m ())
, _nse_getter :: [NodeId] -> m (MVar NodeListStory)
--, _nse_cleaner :: !(IO ()) -- every 12 hours: cleans the repos of unused NodeStories
-- , _nse_lock :: !FileLock -- TODO (it depends on the option: if with database or file only)
}
--deriving (Generic)
nodeStoryEnv :: CmdM env err m => m (NodeStoryEnv env err m)
nodeStoryEnv = do
mvar <- nodeStoryVar Nothing []
--saver <- mkNodeStorySaver mvar
let saver = mkNodeStorySaver mvar
-- let saver = modifyMVar_ mvar $ \mv' -> do
-- writeNodeStories mv'
-- return mv'
pure $ NodeStoryEnv { _nse_var = mvar
, _nse_saver = saver
, _nse_getter = nodeStoryVar (Just mvar) }
nodeStoryVar :: CmdM env err m => Maybe (MVar NodeListStory) -> [NodeId] -> m (MVar NodeListStory)
nodeStoryVar Nothing nIds = do
state <- nodeStoryIncs Nothing nIds
newMVar state
nodeStoryVar (Just mv) nIds = do
_ <- modifyMVar_ mv $ \nsl -> (nodeStoryIncs (Just nsl) nIds)
pure mv
-- TODO No debounce since this is IO stuff.
-- debounce is useful since it could delay the saving to some later
-- time, asynchronously and we keep operating on memory only.
mkNodeStorySaver :: CmdM env err m => MVar NodeListStory -> m ()
mkNodeStorySaver mvns = withMVar mvns writeNodeStories
-- mkNodeStorySaver :: MVar NodeListStory -> Cmd err (Cmd err ())
-- mkNodeStorySaver mvns = mkDebounce settings
-- where
-- settings = defaultDebounceSettings
-- { debounceAction = withMVar mvns (\ns -> writeNodeStories ns)
-- , debounceFreq = 1 * minute
-- -- , debounceEdge = trailingEdge -- Trigger on the trailing edge
-- }
-- minute = 60 * second
-- second = 10^(6 :: Int)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment