Mon, 11 Feb
Does my httpd escape the tab character when I use LTSV?

Labeled Tab-separated Values (LTSV) is a neat format for logging. It’s extensible and easy to use from Apache and Nginx.

FAQ

Aren’t there cases when tabs appear in the strings like User-Agent? No, there aren’t, because of vulnerability management.

Vulnerability? Why?

This FAQ is translated from Japanese version which has a link to LTSV’s another benefit, or what you should take care when logging (Japanese). In this article, the author mentioned about Apache Error Log Escape Sequence Injection Vulnerability. After this vulnerability, Apache escapes \t and another control characters.

Apache

The below log_header_in function (declared in httpd-2.4.3/modules/loggers/mod_log_config.c) returns an escaped value from request headers.

static const char *log_header_in(request_rec *r, char *a)
{
    return ap_escape_logitem(r->pool, apr_table_get(r->headers_in, a));
}

...

static int log_pre_config(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp)
{
    static APR_OPTIONAL_FN_TYPE(ap_register_log_handler) *log_pfn_register;

    log_pfn_register = APR_RETRIEVE_OPTIONAL_FN(ap_register_log_handler);

    if (log_pfn_register) {
        log_pfn_register(p, "h", log_remote_host, 0);
        log_pfn_register(p, "a", log_remote_address, 0 );
        log_pfn_register(p, "A", log_local_address, 0 );
        log_pfn_register(p, "l", log_remote_logname, 0);
        log_pfn_register(p, "u", log_remote_user, 0);
        log_pfn_register(p, "t", log_request_time, 0);
        log_pfn_register(p, "f", log_request_file, 0);
        log_pfn_register(p, "b", clf_log_bytes_sent, 0);
        log_pfn_register(p, "B", log_bytes_sent, 0);
        log_pfn_register(p, "i", log_header_in, 0);
        log_pfn_register(p, "o", log_header_out, 0);

The ap_escape_logitem is defined in httpd-2.4.3/server/util.c.

AP_DECLARE(char *) ap_escape_logitem(apr_pool_t *p, const char *str)
{
    char *ret;
    unsigned char *d;
    const unsigned char *s;

    if (!str) {
        return NULL;
    }

    ret = apr_palloc(p, 4 * strlen(str) + 1); /* Be safe */
    d = (unsigned char *)ret;
    s = (const unsigned char *)str;
    for (; *s; ++s) {

        if (TEST_CHAR(*s, T_ESCAPE_LOGITEM)) {
            *d++ = '\\';
            switch(*s) {
            case '\b':
                *d++ = 'b';
                break;
            case '\n':
                *d++ = 'n';
                break;
            case '\r':
                *d++ = 'r';
                break;
            case '\t':
                *d++ = 't';
                break;
            case '\v':
                *d++ = 'v';
                break;
            case '\\':
            case '"':
                *d++ = *s;
                break;
            default:
                c2x(*s, 'x', d);
                d += 3;
            }
        }
        else {
            *d++ = *s;
        }
    }
    *d = '\0';

    return ret;
}

Nginx

However, is it true in Nginx? Yes.

Nginx doesn’t have a sprintf-like formatting string. Everything is a variable which tied to ngx_http_request_t. The below ngx_http_log_variable (declared in nginx-1.2.6/src/http/modules/ngx_http_log_module.c) function returns an escaped value.

static size_t
ngx_http_log_variable_getlen(ngx_http_request_t *r, uintptr_t data)
{
    uintptr_t                   len;
    ngx_http_variable_value_t  *value;

    value = ngx_http_get_indexed_variable(r, data);

    if (value == NULL || value->not_found) {
        return 1;
    }

    len = ngx_http_log_escape(NULL, value->data, value->len);

    value->escape = len ? 1 : 0;

    return value->len + len * 3;
}


static u_char *
ngx_http_log_variable(ngx_http_request_t *r, u_char *buf, ngx_http_log_op_t *op)
{
    ngx_http_variable_value_t  *value;

    value = ngx_http_get_indexed_variable(r, op->data);

    if (value == NULL || value->not_found) {
        *buf = '-';
        return buf + 1;
    }

    if (value->escape == 0) {
        return ngx_cpymem(buf, value->data, value->len);

    } else {
        return (u_char *) ngx_http_log_escape(buf, value->data, value->len);
    }
}


static uintptr_t
ngx_http_log_escape(u_char *dst, u_char *src, size_t size)
{
    ngx_uint_t      n;
    static u_char   hex[] = "0123456789ABCDEF";

    static uint32_t   escape[] = {
        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */

                    /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
        0x00000004, /* 0000 0000 0000 0000  0000 0000 0000 0100 */

                    /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
        0x10000000, /* 0001 0000 0000 0000  0000 0000 0000 0000 */

                    /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */

        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
    };


    if (dst == NULL) {

        /* find the number of the characters to be escaped */

        n = 0;

        while (size) {
            if (escape[*src >> 5] & (1 << (*src & 0x1f))) {
                n++;
            }
            src++;
            size--;
        }

        return (uintptr_t) n;
    }

    while (size) {
        if (escape[*src >> 5] & (1 << (*src & 0x1f))) {
            *dst++ = '\\';
            *dst++ = 'x';
            *dst++ = hex[*src >> 4];
            *dst++ = hex[*src & 0xf];
            src++;

        } else {
            *dst++ = *src++;
        }
        size--;
    }

    return (uintptr_t) dst;
}

The $http_user_agent (and other variables) is declared in nginx-1.2.6/src/http/ngx_http_variables.c.

static ngx_http_variable_t  ngx_http_core_variables[] = {

    { ngx_string("http_host"), NULL, ngx_http_variable_header,
      offsetof(ngx_http_request_t, headers_in.host), 0, 0 },

    { ngx_string("http_user_agent"), NULL, ngx_http_variable_header,
      offsetof(ngx_http_request_t, headers_in.user_agent), 0, 0 },

Lighttpd

Lighttpd also escapes control characters. The below log_access_write calls accesslog_append_escaped (both are declared in lighttpd-1.4.32/src/mod_accesslog.c).

static const format_mapping fmap[] =
{
    { '%', FORMAT_PERCENT },
    ...
    { 'i', FORMAT_HEADER },

    { 'a', FORMAT_REMOTE_ADDR },
    ...
    { 'O', FORMAT_BYTES_OUT },

    { 'o', FORMAT_RESPONSE_HEADER },

    { '\0', FORMAT_UNSET }
};

...

static void accesslog_append_escaped(buffer *dest, buffer *str) {
    char *ptr, *start, *end;

    /* replaces non-printable chars with \xHH where HH is the hex representation of the byte */
    /* exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */
    if (str->used == 0) return;
    buffer_prepare_append(dest, str->used - 1);

    for (ptr = start = str->ptr, end = str->ptr + str->used - 1; ptr < end; ptr++) {
        char const c = *ptr;
        if (c >= ' ' && c <= '~' && c != '"' && c != '\\') {
            /* nothing to change, add later as one block */
        } else {
            /* copy previous part */
            if (start < ptr) {
                buffer_append_string_len(dest, start, ptr - start);
            }
            start = ptr + 1;

            switch (c) {
            case '"':
                BUFFER_APPEND_STRING_CONST(dest, "\\\"");
                break;
            case '\\':
                BUFFER_APPEND_STRING_CONST(dest, "\\\\");
                break;
            case '\b':
                BUFFER_APPEND_STRING_CONST(dest, "\\b");
                break;
            case '\n':
                BUFFER_APPEND_STRING_CONST(dest, "\\n");
                break;
            case '\r':
                BUFFER_APPEND_STRING_CONST(dest, "\\r");
                break;
            case '\t':
                BUFFER_APPEND_STRING_CONST(dest, "\\t");
                break;
            case '\v':
                BUFFER_APPEND_STRING_CONST(dest, "\\v");
                break;
            default: {
                    /* non printable char => \xHH */
                    char hh[5] = {'\\','x',0,0,0};
                    char h = c / 16;
                    hh[2] = (h > 9) ? (h - 10 + 'A') : (h + '0');
                    h = c % 16;
                    hh[3] = (h > 9) ? (h - 10 + 'A') : (h + '0');
                    buffer_append_string_len(dest, &hh[0], 4);
                }
                break;
            }
        }
    }

    if (start < end) {
        buffer_append_string_len(dest, start, end - start);
    }
}

...

REQUESTDONE_FUNC(log_access_write) {
    plugin_data *p = p_d;
    buffer *b;
    size_t j;

    int newts = 0;
    data_string *ds;

    mod_accesslog_patch_connection(srv, con, p);

    /* No output device, nothing to do */
    if (!p->conf.use_syslog && p->conf.log_access_fd == -1) return HANDLER_GO_ON;

    b = p->conf.access_logbuffer;
    if (b->used == 0) {
        buffer_copy_string_len(b, CONST_STR_LEN(""));
    }

    for (j = 0; j < p->conf.parsed_format->used; j++) {
        switch(p->conf.parsed_format->ptr[j]->type) {
        case FIELD_STRING:
            buffer_append_string_buffer(b, p->conf.parsed_format->ptr[j]->string);
            break;
        case FIELD_FORMAT:
            switch(p->conf.parsed_format->ptr[j]->field) {
            ...
            case FORMAT_RESPONSE_HEADER:
                if (NULL != (ds = (data_string *)array_get_element(con->response.headers, p->conf.parsed_format->ptr[j]->string->ptr))) {
                    accesslog_append_escaped(b, ds->value);
                } else {
                    buffer_append_string_len(b, CONST_STR_LEN("-"));
                }
                break;

Conclusion

At least, Apache, Nginx and Lighttpd escape the tab character. There is a little deference between servers. Apache and Lighttpd use “\t” but Nginx uses “\x09”

Also these servers escape ‘”’ even the character is printable. Probably you can logging in JSON safely if you prefer to output more characters!