From 5c4f3ebda8d5b135707cb03a8d687c1d06441646 Mon Sep 17 00:00:00 2001
From: Alfredo Di Napoli <alfredo@well-typed.com>
Date: Thu, 13 Apr 2023 11:58:53 +0200
Subject: [PATCH] reverse order of logs in jobLog (fixes #192)

---
 src/Gargantext/API/Admin/EnvTypes.hs  | 16 ++++++------
 src/Gargantext/API/Node/Corpus/New.hs | 35 ++++++++++-----------------
 src/Gargantext/Utils/Jobs/Internal.hs | 10 ++++++++
 src/Gargantext/Utils/Jobs/Map.hs      |  5 ++--
 4 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/src/Gargantext/API/Admin/EnvTypes.hs b/src/Gargantext/API/Admin/EnvTypes.hs
index 4f967ee8..541f61ae 100644
--- a/src/Gargantext/API/Admin/EnvTypes.hs
+++ b/src/Gargantext/API/Admin/EnvTypes.hs
@@ -18,11 +18,11 @@ module Gargantext.API.Admin.EnvTypes (
   , ConcreteJobHandle -- opaque
   ) where
 
-import Control.Lens hiding ((:>))
+import Control.Lens hiding ((:<))
 import Control.Monad.Except
 import Control.Monad.Reader
 import Data.Pool (Pool)
-import Data.Sequence (Seq, ViewR(..), viewr)
+import Data.Sequence (Seq, ViewL(..), viewl)
 import Database.PostgreSQL.Simple (Connection)
 import GHC.Generics (Generic)
 import Network.HTTP.Client (Manager)
@@ -159,12 +159,12 @@ instance Jobs.MonadJobStatus (GargM Env err) where
       Just j  -> case jTask j of
         QueuedJ _   -> pure noJobLog
         RunningJ rj -> liftIO (rjGetLog rj) <&>
-                         \lgs -> case viewr lgs of
-                                   EmptyR -> noJobLog
-                                   _ :> l -> l
-        DoneJ lgs _ -> pure $ case viewr lgs of
-                                   EmptyR -> noJobLog
-                                   _ :> l -> l
+                         \lgs -> case viewl lgs of
+                                   EmptyL -> noJobLog
+                                   l :< _ -> l
+        DoneJ lgs _ -> pure $ case viewl lgs of
+                                   EmptyL -> noJobLog
+                                   l :< _ -> l
 
   withTracer extraLogger (JobHandle jId logger) n = n (JobHandle jId (\w -> logger w >> liftIO (extraLogger w)))
 
diff --git a/src/Gargantext/API/Node/Corpus/New.hs b/src/Gargantext/API/Node/Corpus/New.hs
index 384e2acd..a6c3fba3 100644
--- a/src/Gargantext/API/Node/Corpus/New.hs
+++ b/src/Gargantext/API/Node/Corpus/New.hs
@@ -20,6 +20,7 @@ module Gargantext.API.Node.Corpus.New
 
 import Conduit
 import Control.Lens hiding (elements, Empty)
+import Control.Monad
 import Data.Aeson
 import Data.Aeson.TH (deriveJSON)
 import qualified Data.ByteString.Base64 as BSB64
@@ -193,7 +194,6 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
                                          , _wq_lang = l
                                          , _wq_flowListWith = flw }) maybeLimit jobHandle = do
   -- TODO ...
-  markStarted 3 jobHandle
   -- printDebug "[addToCorpusWithQuery] (cid, dbs)" (cid, dbs)
   -- printDebug "[addToCorpusWithQuery] datafield" datafield
   -- printDebug "[addToCorpusWithQuery] flowListWith" flw
@@ -202,49 +202,40 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
     Just Web -> do
       -- printDebug "[addToCorpusWithQuery] processing web request" datafield
 
+      markStarted 1 jobHandle
+
       _ <- triggerSearxSearch user cid q l jobHandle
 
       markComplete jobHandle
 
     _ -> do
+      markStarted 3 jobHandle
+
       -- TODO add cid
       -- TODO if cid is folder -> create Corpus
       --      if cid is corpus -> add to corpus
       --      if cid is root   -> create corpus in Private
       -- printDebug "[G.A.N.C.New] getDataText with query" q
-      databaseOrigin <- database2origin dbs
-      eTxts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [databaseOrigin]
+      db   <- database2origin dbs
+      eTxt <- getDataText db (Multi l) q maybeLimit
 
-      let lTxts = lefts eTxts
       -- printDebug "[G.A.N.C.New] lTxts" lTxts
-      case lTxts of
-        [] -> do
-          let txts = rights eTxts
+      case eTxt of
+        Right txt -> do
           -- TODO Sum lenghts of each txt elements
 
-          -- NOTE(adinapoli) Some other weird arithmetic to have the
-          -- following 'JobLog' as output:
-          -- JobLog
-          -- { _scst_succeeded = Just 2
-          -- , _scst_failed    = Just 0
-          -- , _scst_remaining = Just $ 1 + length txts
-          -- , _scst_events    = Just []
-          -- }
-
-          markStarted (3 + length txts) jobHandle
-          markProgress 2 jobHandle
+          markProgress 1 jobHandle
 
-          _cids <- mapM (\txt -> do
-                           flowDataText user txt (Multi l) cid (Just flw) jobHandle) txts
+          void $ flowDataText user txt (Multi l) cid (Just flw) jobHandle
           -- printDebug "corpus id" cids
           -- printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
           sendMail user
           -- TODO ...
           markComplete jobHandle
 
-        (err:_) -> do
+        Left err -> do
           -- printDebug "Error: " err
-          markFailure 1 (Just $ T.pack (show err)) jobHandle
+          markFailed (Just $ T.pack (show err)) jobHandle
 
 type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
    :> "corpus"
diff --git a/src/Gargantext/Utils/Jobs/Internal.hs b/src/Gargantext/Utils/Jobs/Internal.hs
index a929f890..86b1fd14 100644
--- a/src/Gargantext/Utils/Jobs/Internal.hs
+++ b/src/Gargantext/Utils/Jobs/Internal.hs
@@ -118,6 +118,15 @@ pollJob limit offset jid je = do
       let st = either (const SJ.IsFailure) (const SJ.IsFinished) r
           me = either (Just . T.pack . show) (const Nothing) r
       in pure (ls, st, me)
+  -- /NOTE/: We need to be careful with the ordering of the logs here:
+  -- we want to return the logs ordered from the newest to the oldest,
+  -- because the API will use 'limit' to show only the newest ones,
+  -- taking 'limit' of them from the front of the list.
+  --
+  -- Due to the fact we do not force any 'Ord' constraint on an 'event' type,
+  -- and it would be inefficient to reverse the list here, it's important
+  -- that the concrete implementation of 'rjGetLog' returns the logs in the
+  -- correct order.
   pure $ SJ.jobStatus jid limit offset (toList logs) status merr
 
 waitJob
@@ -176,4 +185,5 @@ killJob t limit offset jid je = do
           me = either (Just . T.pack . show) (const Nothing) r
       removeJob False t jid
       pure (lgs, st, me)
+  -- /NOTE/: Same proviso as in 'pollJob' applies here.
   pure $ SJ.jobStatus jid limit offset (toList logs) status merr
diff --git a/src/Gargantext/Utils/Jobs/Map.hs b/src/Gargantext/Utils/Jobs/Map.hs
index a537fa48..d2d4417c 100644
--- a/src/Gargantext/Utils/Jobs/Map.hs
+++ b/src/Gargantext/Utils/Jobs/Map.hs
@@ -13,7 +13,6 @@ module Gargantext.Utils.Jobs.Map (
   , newJobMap
   , lookupJob
   , gcThread
-  , jobLog
   , addJobEntry
   , deleteJob
   , runJob
@@ -117,8 +116,10 @@ gcThread js (JobMap mvar) = go
           _      -> False
 
 -- | Make a 'Logger' that 'mappend's monoidal values in a 'TVar'.
+-- /IMPORTANT/: The new value is appended in front. The ordering is important later on
+-- when consuming logs from the API (see for example 'pollJob').
 jobLog :: Semigroup w => TVar w -> Logger w -- w -> IO ()
-jobLog logvar = \w -> atomically $ modifyTVar' logvar (\old_w -> old_w <> w)
+jobLog logvar = \w -> atomically $ modifyTVar' logvar (\old_w -> w <> old_w)
 
 -- | Generating new 'JobEntry's.
 addJobEntry
-- 
2.21.0