Commit f53b7e45 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Sanitise logged requests

This commit introduces a slight variation to the `logStdOutDev`
middleware which is capable of redacting secrets from our gargantext
requests.

This way we can run the server in dev mode (for instrumentation
purposes) without leaking users' secrets.
parent 3ce0e6a0
Pipeline #5496 failed with stages
in 17 minutes and 57 seconds
......@@ -14,13 +14,13 @@ STORE_DIR="${1:-$DEFAULT_STORE}"
expected_cabal_project_hash="69e03370a602f40243373515ff884a2cd50dc02eb6f52cd23ba9016a61fe8069"
expected_cabal_project_freeze_hash="796f0109611f3381278b1885ae1fa257c4177b99885eb04701938f1107c06ee5"
cabal --store-dir=$STORE_DIR v2-update 'hackage.haskell.org,2023-11-23T20:05:40Z'
cabal --store-dir=$STORE_DIR v2-update 'hackage.haskell.org,2023-12-10T10:34:46Z'
# Install stack2cabal if it can't be found.
if ! stack2cabal --help &> /dev/null
then
echo "stack2cabal could not be found"
cabal --store-dir=$STORE_DIR v2-install --index-state="2023-11-23T20:05:40Z" stack2cabal-1.0.14 --overwrite-policy=always
cabal --store-dir=$STORE_DIR v2-install --index-state="2023-12-10T10:34:46Z" stack2cabal-1.0.14 --overwrite-policy=always
fi
stack2cabal --no-run-hpack -p '2023-11-23 20:05:40'
......
-- Generated by stack2cabal
index-state: 2023-12-04T09:05:40Z
index-state: 2023-12-10T10:34:46Z
with-compiler: ghc-8.10.7
......@@ -109,7 +109,7 @@ source-repository-package
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
tag: 234ad423fa682307ff4843ae4acd725dcc6ffc55
tag: 300764df4f78ea6175535f9b78b884cc2aa9da61
source-repository-package
type: git
......
......@@ -2643,7 +2643,7 @@ constraints: any.AC-Angle ==1.0,
any.vinyl ==0.13.3,
any.void ==0.7.3,
any.vty ==5.33,
any.wai ==3.2.3,
any.wai ==3.2.4,
any.wai-app-static ==3.1.7.3,
any.wai-conduit ==3.0.0.4,
any.wai-cors ==0.2.7,
......
......@@ -62,6 +62,7 @@ library
Gargantext.API.Errors.Types
Gargantext.API.Errors.Types.Backend
Gargantext.API.HashedResponse
Gargantext.API.Middleware
Gargantext.API.Ngrams
Gargantext.API.Ngrams.List
Gargantext.API.Ngrams.List.Types
......@@ -409,6 +410,7 @@ library
, accelerate-utility ^>= 1.0.0.1
, aeson ^>= 1.5.6.0
, aeson-pretty ^>= 0.8.9
, ansi-terminal
, array ^>= 0.5.4.0
, async ^>= 2.2.4
, attoparsec ^>= 0.13.2.5
......@@ -479,6 +481,7 @@ library
, jose ^>= 0.8.4
, json-stream ^>= 0.4.2.4
, lens ^>= 4.19.2
, lens-aeson < 1.3
, lifted-base ^>= 0.2.3.12
, listsafe ^>= 0.1.0.1
, located-base ^>= 0.1.1.1
......@@ -566,7 +569,7 @@ library
, uuid ^>= 1.3.15
, validity ^>= 0.11.0.1
, vector ^>= 0.12.3.0
, wai ^>= 3.2.3
, wai >= 3.2.4
, wai-app-static ^>= 3.1.7.3
, wai-cors ^>= 0.2.7
, wai-extra ^>= 3.1.8
......
......@@ -45,6 +45,7 @@ import Gargantext.API.Admin.EnvTypes (Env, Mode(..))
import Gargantext.API.Admin.Settings (newEnv)
import Gargantext.API.Admin.Types (FireWall(..), PortNumber, cookieSettings, jwtSettings, settings)
import Gargantext.API.EKG
import Gargantext.API.Middleware (logStdoutDevSanitised)
import Gargantext.API.Ngrams (saveNodeStoryImmediate)
import Gargantext.API.Routes
import Gargantext.API.Server (server)
......@@ -58,7 +59,7 @@ import Network.Wai.Handler.Warp hiding (defaultSettings)
import Network.Wai.Middleware.Cors
import Network.Wai.Middleware.RequestLogger
import Paths_gargantext (getDataDir)
import Servant
import Servant hiding (Header)
import System.Cron.Schedule qualified as Cron
import System.FilePath
......@@ -210,7 +211,10 @@ makeDevMiddleware mode = do
--pure (warpS, logWare . checkOriginAndHost . corsMiddleware)
case mode of
Prod -> pure $ logStdout . corsMiddleware
_ -> pure $ logStdoutDev . corsMiddleware
_ -> do
loggerMiddleware <- logStdoutDevSanitised
pure $ loggerMiddleware . corsMiddleware
---------------------------------------------------------------------
-- | API Global
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
{-| Edit 'sensitiveKeywords' to extend the list of redacted fields. -}
module Gargantext.API.Middleware (
logStdoutDevSanitised
) where
import Control.Lens
import Control.Monad.Logger
import Data.Aeson qualified as A
import Data.Aeson.Lens qualified as L
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS
import Data.ByteString.Char8 qualified as C8
import Data.CaseInsensitive qualified as CI
import Data.List qualified as L
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Network.HTTP.Types
import Network.HTTP.Types.Header
import Network.Wai
import Network.Wai.Middleware.RequestLogger
import Prelude
import System.Console.ANSI
-- | Like 'logStdoutDev' from \"wai-extra\", but redacts (or omits altogether) payloads which might have
-- sensitive information
logStdoutDevSanitised :: IO Middleware
logStdoutDevSanitised = mkRequestLogger $ defaultRequestLoggerSettings { outputFormat = CustomOutputFormatWithDetailsAndHeaders customOutput }
-- |
-- Like 'key', but uses 'at' instead of 'ix'. This is handy when
-- adding and removing object keys:
--
-- >>> "{\"a\": 100, \"b\": 200}" & atKey "a" .~ Nothing
-- "{\"b\":200}"
--
-- >>> "{\"a\": 100, \"b\": 200}" & atKey "c" ?~ String "300"
-- "{\"a\":100,\"b\":200,\"c\":\"300\"}"
atKey :: L.AsValue t => T.Text -> Traversal' t (Maybe A.Value)
atKey i = L._Object . at i
{-# INLINE atKey #-}
customOutput :: OutputFormatterWithDetailsAndHeaders
customOutput _zonedDate rq status _mb_response_size request_dur (sanitiseBody . mconcat -> reqbody) _raw_response (map sanitiseHeader -> headers) =
let params = map sanitiseQueryItem (queryString rq)
in mkRequestLog params reqbody <> mkResponseLog
where
mkRequestLog :: [QueryItem] -> ByteString -> LogStr
mkRequestLog params bdy =
foldMap toLogStr (ansiMethod' (requestMethod rq))
<> " "
<> toLogStr (rawPathInfo rq)
<> "\n"
<> foldMap (\(k, mb_v) -> toLogStr $ show (k, mb_v)) params
<> toLogStr bdy
<> "\n"
<> foldMap (\(k, v) -> toLogStr $ mconcat $ ansiColor' White $ " " <> CI.original k <> ": " <> v <> "\n") headers
<> "\n"
mkResponseLog :: LogStr
mkResponseLog =
foldMap toLogStr (ansiColor' White " Status: ")
<> foldMap toLogStr (ansiStatusCode' status (C8.pack (show $ statusCode status) <> " " <> statusMessage status))
<> " "
<> "Served in " <> toLogStr (C8.pack $ show $ request_dur)
<> "\n"
sanitiseBody :: ByteString -> ByteString
sanitiseBody blob = L.foldr (\k acc -> over (atKey k) (updateField k) acc) blob sensitiveKeywords
where
updateField :: T.Text -> Maybe A.Value -> Maybe A.Value
updateField _ Nothing = Nothing
updateField k (Just x)
| A.String _v <- x
, k `elem` sensitiveKeywords
= Just $ A.String "*****"
| otherwise
= Just x
sanitiseQueryItem :: QueryItem -> QueryItem
sanitiseQueryItem (k, mb_v)
| TE.decodeUtf8 k `elem` sensitiveKeywords
= (k, (\v -> if C8.null v then mempty else "*****") <$> mb_v)
| otherwise
= (k, mb_v)
-- /NOTE:/ Extend this list to filter for more sensitive keywords.
sensitiveKeywords :: [T.Text]
sensitiveKeywords = [
"password"
, "api_key"
, "apiKey"
, "pubmedAPIKey"
]
sanitiseHeader :: Header -> Header
sanitiseHeader (hName, content)
| hName == hAuthorization = (hName, "*****")
| hName == hCookie = (hName, "*****")
| hName == hSetCookie = (hName, "*****")
| otherwise = (hName, content)
ansiColor' :: Color -> BS.ByteString -> [BS.ByteString]
ansiColor' color bs =
[ C8.pack $ setSGRCode [SetColor Foreground Dull color]
, bs
, C8.pack $ setSGRCode [Reset]
]
-- | Tags http method with a unique color.
ansiMethod' :: BS.ByteString -> [BS.ByteString]
ansiMethod' m = case m of
"GET" -> ansiColor' Cyan m
"HEAD" -> ansiColor' Cyan m
"PUT" -> ansiColor' Green m
"POST" -> ansiColor' Yellow m
"DELETE" -> ansiColor' Red m
_ -> ansiColor' Magenta m
ansiStatusCode' :: Status -> ByteString -> [BS.ByteString]
ansiStatusCode' (Status c _) t = case C8.take 1 (C8.pack . show $ c) of
"2" -> ansiColor' Green t
"3" -> ansiColor' Yellow t
"4" -> ansiColor' Red t
"5" -> ansiColor' Magenta t
_ -> ansiColor' Blue t
......@@ -97,7 +97,9 @@ get apiKey q l = do
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = Just apiKey
, query = getRawQuery q
, perPage = Just 200
, mWebEnv = Nothing })
, mWebEnv = Nothing
, enableDebugLogs = False
})
let takeLimit = case l of
Nothing -> mapC identity
Just l' -> takeC $ getLimit l'
......
......@@ -54,7 +54,7 @@ extra-deps:
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
# External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 234ad423fa682307ff4843ae4acd725dcc6ffc55
commit: 300764df4f78ea6175535f9b78b884cc2aa9da61
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: 9b1bd17f3ed38eab83e675bb68278922217a9c73
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
......@@ -147,6 +147,7 @@ extra-deps:
- tasty-hspec-1.2.0.3
- tmp-postgres-1.34.1.0
- vector-0.12.3.0@sha256:0ae2c1ba86f0077910be242ec6802cc3d7725fe7b2bea6987201aa3737b239b5,7953
- wai-3.2.4
# For the graph clustering
ghc-options:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment