Commit f53b7e45 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Sanitise logged requests

This commit introduces a slight variation to the `logStdOutDev`
middleware which is capable of redacting secrets from our gargantext

This way we can run the server in dev mode (for instrumentation
purposes) without leaking users' secrets.
parent 3ce0e6a0
Pipeline #5496 failed with stages
in 17 minutes and 57 seconds
......@@ -14,13 +14,13 @@ STORE_DIR="${1:-$DEFAULT_STORE}"
cabal --store-dir=$STORE_DIR v2-update ',2023-11-23T20:05:40Z'
cabal --store-dir=$STORE_DIR v2-update ',2023-12-10T10:34:46Z'
# Install stack2cabal if it can't be found.
if ! stack2cabal --help &> /dev/null
echo "stack2cabal could not be found"
cabal --store-dir=$STORE_DIR v2-install --index-state="2023-11-23T20:05:40Z" stack2cabal-1.0.14 --overwrite-policy=always
cabal --store-dir=$STORE_DIR v2-install --index-state="2023-12-10T10:34:46Z" stack2cabal-1.0.14 --overwrite-policy=always
stack2cabal --no-run-hpack -p '2023-11-23 20:05:40'
-- Generated by stack2cabal
index-state: 2023-12-04T09:05:40Z
index-state: 2023-12-10T10:34:46Z
with-compiler: ghc-8.10.7
......@@ -109,7 +109,7 @@ source-repository-package
type: git
tag: 234ad423fa682307ff4843ae4acd725dcc6ffc55
tag: 300764df4f78ea6175535f9b78b884cc2aa9da61
type: git
......@@ -2643,7 +2643,7 @@ constraints: any.AC-Angle ==1.0,
any.vinyl ==0.13.3,
any.void ==0.7.3,
any.vty ==5.33,
any.wai ==3.2.3,
any.wai ==3.2.4,
any.wai-app-static ==,
any.wai-conduit ==,
any.wai-cors ==0.2.7,
......@@ -62,6 +62,7 @@ library
......@@ -409,6 +410,7 @@ library
, accelerate-utility ^>=
, aeson ^>=
, aeson-pretty ^>= 0.8.9
, ansi-terminal
, array ^>=
, async ^>= 2.2.4
, attoparsec ^>=
......@@ -479,6 +481,7 @@ library
, jose ^>= 0.8.4
, json-stream ^>=
, lens ^>= 4.19.2
, lens-aeson < 1.3
, lifted-base ^>=
, listsafe ^>=
, located-base ^>=
......@@ -566,7 +569,7 @@ library
, uuid ^>= 1.3.15
, validity ^>=
, vector ^>=
, wai ^>= 3.2.3
, wai >= 3.2.4
, wai-app-static ^>=
, wai-cors ^>= 0.2.7
, wai-extra ^>= 3.1.8
......@@ -45,6 +45,7 @@ import Gargantext.API.Admin.EnvTypes (Env, Mode(..))
import Gargantext.API.Admin.Settings (newEnv)
import Gargantext.API.Admin.Types (FireWall(..), PortNumber, cookieSettings, jwtSettings, settings)
import Gargantext.API.EKG
import Gargantext.API.Middleware (logStdoutDevSanitised)
import Gargantext.API.Ngrams (saveNodeStoryImmediate)
import Gargantext.API.Routes
import Gargantext.API.Server (server)
......@@ -58,7 +59,7 @@ import Network.Wai.Handler.Warp hiding (defaultSettings)
import Network.Wai.Middleware.Cors
import Network.Wai.Middleware.RequestLogger
import Paths_gargantext (getDataDir)
import Servant
import Servant hiding (Header)
import System.Cron.Schedule qualified as Cron
import System.FilePath
......@@ -210,7 +211,10 @@ makeDevMiddleware mode = do
--pure (warpS, logWare . checkOriginAndHost . corsMiddleware)
case mode of
Prod -> pure $ logStdout . corsMiddleware
_ -> pure $ logStdoutDev . corsMiddleware
_ -> do
loggerMiddleware <- logStdoutDevSanitised
pure $ loggerMiddleware . corsMiddleware
-- | API Global
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
{-| Edit 'sensitiveKeywords' to extend the list of redacted fields. -}
module Gargantext.API.Middleware (
) where
import Control.Lens
import Control.Monad.Logger
import Data.Aeson qualified as A
import Data.Aeson.Lens qualified as L
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS
import Data.ByteString.Char8 qualified as C8
import Data.CaseInsensitive qualified as CI
import Data.List qualified as L
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Network.HTTP.Types
import Network.HTTP.Types.Header
import Network.Wai
import Network.Wai.Middleware.RequestLogger
import Prelude
import System.Console.ANSI
-- | Like 'logStdoutDev' from \"wai-extra\", but redacts (or omits altogether) payloads which might have
-- sensitive information
logStdoutDevSanitised :: IO Middleware
logStdoutDevSanitised = mkRequestLogger $ defaultRequestLoggerSettings { outputFormat = CustomOutputFormatWithDetailsAndHeaders customOutput }
-- |
-- Like 'key', but uses 'at' instead of 'ix'. This is handy when
-- adding and removing object keys:
-- >>> "{\"a\": 100, \"b\": 200}" & atKey "a" .~ Nothing
-- "{\"b\":200}"
-- >>> "{\"a\": 100, \"b\": 200}" & atKey "c" ?~ String "300"
-- "{\"a\":100,\"b\":200,\"c\":\"300\"}"
atKey :: L.AsValue t => T.Text -> Traversal' t (Maybe A.Value)
atKey i = L._Object . at i
{-# INLINE atKey #-}
customOutput :: OutputFormatterWithDetailsAndHeaders
customOutput _zonedDate rq status _mb_response_size request_dur (sanitiseBody . mconcat -> reqbody) _raw_response (map sanitiseHeader -> headers) =
let params = map sanitiseQueryItem (queryString rq)
in mkRequestLog params reqbody <> mkResponseLog
mkRequestLog :: [QueryItem] -> ByteString -> LogStr
mkRequestLog params bdy =
foldMap toLogStr (ansiMethod' (requestMethod rq))
<> " "
<> toLogStr (rawPathInfo rq)
<> "\n"
<> foldMap (\(k, mb_v) -> toLogStr $ show (k, mb_v)) params
<> toLogStr bdy
<> "\n"
<> foldMap (\(k, v) -> toLogStr $ mconcat $ ansiColor' White $ " " <> CI.original k <> ": " <> v <> "\n") headers
<> "\n"
mkResponseLog :: LogStr
mkResponseLog =
foldMap toLogStr (ansiColor' White " Status: ")
<> foldMap toLogStr (ansiStatusCode' status (C8.pack (show $ statusCode status) <> " " <> statusMessage status))
<> " "
<> "Served in " <> toLogStr (C8.pack $ show $ request_dur)
<> "\n"
sanitiseBody :: ByteString -> ByteString
sanitiseBody blob = L.foldr (\k acc -> over (atKey k) (updateField k) acc) blob sensitiveKeywords
updateField :: T.Text -> Maybe A.Value -> Maybe A.Value
updateField _ Nothing = Nothing
updateField k (Just x)
| A.String _v <- x
, k `elem` sensitiveKeywords
= Just $ A.String "*****"
| otherwise
= Just x
sanitiseQueryItem :: QueryItem -> QueryItem
sanitiseQueryItem (k, mb_v)
| TE.decodeUtf8 k `elem` sensitiveKeywords
= (k, (\v -> if C8.null v then mempty else "*****") <$> mb_v)
| otherwise
= (k, mb_v)
-- /NOTE:/ Extend this list to filter for more sensitive keywords.
sensitiveKeywords :: [T.Text]
sensitiveKeywords = [
, "api_key"
, "apiKey"
, "pubmedAPIKey"
sanitiseHeader :: Header -> Header
sanitiseHeader (hName, content)
| hName == hAuthorization = (hName, "*****")
| hName == hCookie = (hName, "*****")
| hName == hSetCookie = (hName, "*****")
| otherwise = (hName, content)
ansiColor' :: Color -> BS.ByteString -> [BS.ByteString]
ansiColor' color bs =
[ C8.pack $ setSGRCode [SetColor Foreground Dull color]
, bs
, C8.pack $ setSGRCode [Reset]
-- | Tags http method with a unique color.
ansiMethod' :: BS.ByteString -> [BS.ByteString]
ansiMethod' m = case m of
"GET" -> ansiColor' Cyan m
"HEAD" -> ansiColor' Cyan m
"PUT" -> ansiColor' Green m
"POST" -> ansiColor' Yellow m
"DELETE" -> ansiColor' Red m
_ -> ansiColor' Magenta m
ansiStatusCode' :: Status -> ByteString -> [BS.ByteString]
ansiStatusCode' (Status c _) t = case C8.take 1 (C8.pack . show $ c) of
"2" -> ansiColor' Green t
"3" -> ansiColor' Yellow t
"4" -> ansiColor' Red t
"5" -> ansiColor' Magenta t
_ -> ansiColor' Blue t
......@@ -97,7 +97,9 @@ get apiKey q l = do
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = Just apiKey
, query = getRawQuery q
, perPage = Just 200
, mWebEnv = Nothing })
, mWebEnv = Nothing
, enableDebugLogs = False
let takeLimit = case l of
Nothing -> mapC identity
Just l' -> takeC $ getLimit l'
......@@ -54,7 +54,7 @@ extra-deps:
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
# External Data API connectors
- git:
commit: 234ad423fa682307ff4843ae4acd725dcc6ffc55
commit: 300764df4f78ea6175535f9b78b884cc2aa9da61
- git:
commit: 9b1bd17f3ed38eab83e675bb68278922217a9c73
- git:
......@@ -147,6 +147,7 @@ extra-deps:
- tasty-hspec-
- tmp-postgres-
- vector-,7953
- wai-3.2.4
# For the graph clustering
