From 8cda7475e6b456636f61c48c2132ecf32f4c23b1 Mon Sep 17 00:00:00 2001
From: Andrew Hayworth <andrew.hayworth@getbraintree.com>
Date: Tue, 7 Apr 2015 21:42:53 +0000
Subject: [PATCH] Add a new log format variable "%p" that spits out the
 sanitized request path

It's often undesirable to log query params - and in some cases, it can
create legal compliance problems. This commit adds a new log format
variable that logs the HTTP verb and the path requested sans query
string (and additionally ommitting the protocol). For example, the
following HTTP request line:

  GET /foo?bar=baz HTTP/1.1

becomes:

  GET /foo

with this log format variable.
---
 include/types/log.h |  1 +
 src/log.c           | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/include/types/log.h b/include/types/log.h
index c7e47ea..3205ce6 100644
--- a/include/types/log.h
+++ b/include/types/log.h
@@ -90,6 +90,7 @@ enum {
 	LOG_FMT_HDRREQUESTLIST,
 	LOG_FMT_HDRRESPONSLIST,
 	LOG_FMT_REQ,
+	LOG_FMT_PATH,
 	LOG_FMT_HOSTNAME,
 	LOG_FMT_UNIQUEID,
 	LOG_FMT_SSL_CIPHER,
diff --git a/src/log.c b/src/log.c
index 1a5ad25..af89e00 100644
--- a/src/log.c
+++ b/src/log.c
@@ -108,6 +108,7 @@ static const struct logformat_type logformat_keywords[] = {
 	{ "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL },  /* header response */
 	{ "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL },  /* header response list */
 	{ "ms", LOG_FMT_MS, PR_MODE_TCP, LW_INIT, NULL },       /* accept date millisecond */
+	{ "p", LOG_FMT_PATH, PR_MODE_HTTP, LW_REQ, NULL },  /* path */
 	{ "pid", LOG_FMT_PID, PR_MODE_TCP, LW_INIT, NULL }, /* log pid */
 	{ "r", LOG_FMT_REQ, PR_MODE_HTTP, LW_REQ, NULL },  /* request */
 	{ "rc", LOG_FMT_RETRIES, PR_MODE_TCP, LW_BYTES, NULL },  /* retries */
@@ -1539,6 +1540,29 @@ int build_logline(struct session *s, char *dst, size_t maxsize, struct list *lis
 				last_isspace = 0;
 				break;
 
+			case LOG_FMT_PATH: // %p
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				uri = txn->uri ? txn->uri : "<BADREQ>";
+				ret = encode_string(tmplog, dst + maxsize,
+						       '#', url_encode_map, uri);
+				if (ret == NULL || *ret != '\0')
+					goto out;
+
+				// Cut off request line at first occurrence of '?' which signals the beginning of
+				// request params (and end of path). If no params are present, cut off at last space
+				// which otherwise signals the end of the path.
+				uri = strchr(tmplog, '?');
+				if (uri == NULL) {
+					uri = strrchr(tmplog, ' ');
+				}
+				tmplog = uri ? uri : ret;
+
+				if (tmp->options & LOG_OPT_QUOTE)
+					LOGCHAR('"');
+				last_isspace = 0;
+				break;
+
 			case LOG_FMT_PID: // %pid
 				if (tmp->options & LOG_OPT_HEXA) {
 					iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", pid);
-- 
2.1.3

