Browse Source

[mod_redirect,mod_rewrite] encoding options (fixes #443, fixes #911)

Provide means to encode redirect and rewrite backreference substitutions
  In addition to $1 and %1, the following modifiers are now supported,
  followed by the number for the backreference, e.g. ${esc:1}

  ${noesc:...}  no escaping
  ${esc:...}    escape all non-alphanumeric - . _ ~ incl double-escape %
  ${escape:...} escape all non-alphanumeric - . _ ~ incl double-escape %
  ${escnde:...} escape all non-alphanumeric - . _ ~  but no double-esc %
  ${tolower:...}
  ${toupper:...}

  %{noesc:...}
  %{esc:...}
  %{escape:...}
  %{escnde:...}
  %{tolower:...}
  %{toupper:...}

Provide means to substitute URI parts without needing a regex match
  (and can be preceded by encoding modifier,
     e.g. ${tolower:url.authority})

  ${url.scheme}
  ${url.authority}
  ${url.port}
  ${url.path}
  ${url.query}

  ${qsa}        appends query string, if not empty

x-ref:
  "[PATCH] mod_redirect: Add support for url-encoding backreferences, map %%n->%n, $$n->$n"
  https://redmine.lighttpd.net/issues/443
  "Need for URL encoding in mod_redirect and possibly mod_rewrite"
  https://redmine.lighttpd.net/issues/911
personal/stbuehler/fix-fdevent
Glenn Strauss 4 years ago
parent
commit
255269d799
  1. 147
      src/burl.c
  2. 12
      src/burl.h
  3. 175
      src/keyvalue.c
  4. 1
      src/mod_redirect.c
  5. 1
      src/mod_rewrite.c

147
src/burl.c

@ -355,3 +355,150 @@ int burl_normalize (buffer *b, buffer *t, int flags)
return qs;
}
static void burl_append_encode_nde (buffer * const b, const char * const str, const size_t len)
{
/* percent-encodes everything except unreserved - . 0-9 A-Z _ a-z ~
* unless already percent-encoded (does not double-encode) */
/* Note: not checking for invalid UTF-8 */
char * const p = buffer_string_prepare_append(b, len*3);
unsigned int n1, n2;
int j = 0;
for (unsigned int i = 0; i < len; ++i, ++j) {
if (str[i]=='%' && li_cton(str[i+1], n1) && li_cton(str[i+2], n2)) {
const unsigned int x = (n1 << 4) | n2;
if (burl_is_unreserved((int)x)) {
p[j] = (char)x;
}
else { /* leave UTF-8, control chars, and required chars encoded */
p[j] = '%';
p[++j] = str[i+1];
p[++j] = str[i+2];
}
i+=2;
}
else if (burl_is_unreserved(str[i])) {
p[j] = str[i];
}
else {
p[j] = '%';
p[++j] = hex_chars_uc[(str[i] >> 4) & 0xF];
p[++j] = hex_chars_uc[str[i] & 0xF];
}
}
buffer_commit(b, j);
}
static void burl_append_encode_psnde (buffer * const b, const char * const str, const size_t len)
{
/* percent-encodes everything except unreserved - . 0-9 A-Z _ a-z ~ plus /
* unless already percent-encoded (does not double-encode) */
/* Note: not checking for invalid UTF-8 */
char * const p = buffer_string_prepare_append(b, len*3);
unsigned int n1, n2;
int j = 0;
for (unsigned int i = 0; i < len; ++i, ++j) {
if (str[i]=='%' && li_cton(str[i+1], n1) && li_cton(str[i+2], n2)) {
const unsigned int x = (n1 << 4) | n2;
if (burl_is_unreserved((int)x)) {
p[j] = (char)x;
}
else { /* leave UTF-8, control chars, and required chars encoded */
p[j] = '%';
p[++j] = str[i+1];
p[++j] = str[i+2];
}
i+=2;
}
else if (burl_is_unreserved(str[i]) || str[i] == '/') {
p[j] = str[i];
}
else {
p[j] = '%';
p[++j] = hex_chars_uc[(str[i] >> 4) & 0xF];
p[++j] = hex_chars_uc[str[i] & 0xF];
}
}
buffer_commit(b, j);
}
static void burl_append_encode_all (buffer * const b, const char * const str, const size_t len)
{
/* percent-encodes everything except unreserved - . 0-9 A-Z _ a-z ~
* Note: double-encodes any existing '%') */
/* Note: not checking for invalid UTF-8 */
char * const p = buffer_string_prepare_append(b, len*3);
int j = 0;
for (unsigned int i = 0; i < len; ++i, ++j) {
if (burl_is_unreserved(str[i])) {
p[j] = str[i];
}
else {
p[j] = '%';
p[++j] = hex_chars_uc[(str[i] >> 4) & 0xF];
p[++j] = hex_chars_uc[str[i] & 0xF];
}
}
buffer_commit(b, j);
}
static void burl_offset_tolower (buffer * const b, const size_t off)
{
/*(skips over all percent-encodings, including encoding of alpha chars)*/
for (char *p = b->ptr+off; p[0]; ++p) {
if (p[0] >= 'A' && p[0] <= 'Z') p[0] |= 0x20;
else if (p[0]=='%' && light_isxdigit(p[1]) && light_isxdigit(p[2]))
p+=2;
}
}
static void burl_offset_toupper (buffer * const b, const size_t off)
{
/*(skips over all percent-encodings, including encoding of alpha chars)*/
for (char *p = b->ptr+off; p[0]; ++p) {
if (p[0] >= 'a' && p[0] <= 'z') p[0] &= 0xdf;
else if (p[0]=='%' && light_isxdigit(p[1]) && light_isxdigit(p[2]))
p+=2;
}
}
void burl_append (buffer * const b, const char * const str, const size_t len, const int flags)
{
size_t off = 0;
if (0 == len) return;
if (0 == flags) {
buffer_append_string_len(b, str, len);
return;
}
if (flags & (BURL_TOUPPER|BURL_TOLOWER)) off = buffer_string_length(b);
if (flags & BURL_ENCODE_NONE) {
buffer_append_string_len(b, str, len);
}
else if (flags & BURL_ENCODE_ALL) {
burl_append_encode_all(b, str, len);
}
else if (flags & BURL_ENCODE_NDE) {
burl_append_encode_nde(b, str, len);
}
else if (flags & BURL_ENCODE_PSNDE) {
burl_append_encode_psnde(b, str, len);
}
/* note: not normalizing str, which could come from arbitrary header,
* so it is possible that alpha chars are percent-encoded upper/lowercase */
if (flags & (BURL_TOLOWER|BURL_TOUPPER)) {
(flags & BURL_TOLOWER)
? burl_offset_tolower(b, off) /*(flags & BURL_TOLOWER)*/
: burl_offset_toupper(b, off); /*(flags & BURL_TOUPPER)*/
}
}

12
src/burl.h

@ -7,6 +7,7 @@
struct burl_parts_t {
buffer *scheme;
buffer *authority;
unsigned short port;
buffer *path;
buffer *query;
};
@ -29,4 +30,15 @@ enum burl_opts_e {
int burl_normalize (buffer *b, buffer *t, int flags);
enum burl_recoding_e {
BURL_TOLOWER = 0x0001
,BURL_TOUPPER = 0x0002
,BURL_ENCODE_NONE = 0x0004
,BURL_ENCODE_ALL = 0x0008
,BURL_ENCODE_NDE = 0x0010 /* encode delims, but no-double-encode (NDE) */
,BURL_ENCODE_PSNDE = 0x0020 /* similar to NDE, but preserve literal slash */
};
void burl_append (buffer * const b, const char * const str, const size_t len, const int flags);
#endif

175
src/keyvalue.c

@ -2,6 +2,7 @@
#include "keyvalue.h"
#include "base.h"
#include "burl.h"
#include "log.h"
#include <stdlib.h>
@ -112,6 +113,146 @@ void pcre_keyvalue_buffer_free(pcre_keyvalue_buffer *kvb) {
}
#ifdef HAVE_PCRE_H
static void pcre_keyvalue_buffer_append_match(buffer *b, const char **list, int n, unsigned int num, int flags) {
if (num < (unsigned int)n) { /* n is always > 0 */
burl_append(b, list[num], strlen(list[num]), flags);
}
}
static void pcre_keyvalue_buffer_append_ctxmatch(buffer *b, pcre_keyvalue_ctx *ctx, unsigned int num, int flags) {
const struct cond_cache_t * const cache = ctx->cache;
if (!cache) return; /* no enclosing match context */
if (num < (unsigned int)cache->patterncount) {
const int off = cache->matches[(num <<= 1)]; /*(num *= 2)*/
const int len = cache->matches[num+1] - off;
burl_append(b, cache->comp_value->ptr + off, (size_t)len, flags);
}
}
static int pcre_keyvalue_buffer_subst_ext(buffer *b, const char *pattern, const char **list, int n, pcre_keyvalue_ctx *ctx) {
const unsigned char *p = (unsigned char *)pattern+2;/* +2 past ${} or %{} */
int flags = 0;
while (!light_isdigit(*p) && *p != '}' && *p != '\0') {
if (0) {
}
else if (p[0] == 'e' && p[1] == 's' && p[2] == 'c') {
p+=3;
if (p[0] == ':') {
flags |= BURL_ENCODE_ALL;
p+=1;
}
else if (0 == strncmp((const char *)p, "ape:", 4)) {
flags |= BURL_ENCODE_ALL;
p+=4;
}
else if (0 == strncmp((const char *)p, "nde:", 4)) {
flags |= BURL_ENCODE_NDE;
p+=4;
}
else if (0 == strncmp((const char *)p, "psnde:", 6)) {
flags |= BURL_ENCODE_PSNDE;
p+=6;
}
else { /* skip unrecognized esc... */
p = (const unsigned char *)strchr((const char *)p, ':');
if (NULL == p) return -1;
++p;
}
}
else if (p[0] == 'n' && p[1] == 'o') {
p+=2;
if (0 == strncmp((const char *)p, "esc:", 4)) {
flags |= BURL_ENCODE_NONE;
p+=4;
}
else if (0 == strncmp((const char *)p, "escape:", 7)) {
flags |= BURL_ENCODE_NONE;
p+=7;
}
else { /* skip unrecognized no... */
p = (const unsigned char *)strchr((const char *)p, ':');
if (NULL == p) return -1;
++p;
}
}
else if (p[0] == 't' && p[1] == 'o') {
p+=2;
if (0 == strncmp((const char *)p, "lower:", 6)) {
flags |= BURL_TOLOWER;
p+=6;
}
else if (0 == strncmp((const char *)p, "upper:", 6)) {
flags |= BURL_TOLOWER;
p+=6;
}
else { /* skip unrecognized to... */
p = (const unsigned char *)strchr((const char *)p, ':');
if (NULL == p) return -1;
++p;
}
}
else if (p[0] == 'u' && p[1] == 'r' && p[2] == 'l' && p[3] == '.') {
p+=4;
if (0 == strncmp((const char *)p, "scheme}", 7)) {
burl_append(b, CONST_BUF_LEN(ctx->burl->scheme), flags);
p+=6;
}
else if (0 == strncmp((const char *)p, "authority}", 10)) {
burl_append(b, CONST_BUF_LEN(ctx->burl->authority), flags);
p+=9;
}
else if (0 == strncmp((const char *)p, "port}", 5)) {
buffer_append_int(b, (int)ctx->burl->port);
p+=4;
}
else if (0 == strncmp((const char *)p, "path}", 5)) {
burl_append(b, CONST_BUF_LEN(ctx->burl->path), flags);
p+=4;
}
else if (0 == strncmp((const char *)p, "query}", 6)) {
burl_append(b, CONST_BUF_LEN(ctx->burl->query), flags);
p+=5;
}
else { /* skip unrecognized url.* */
p = (const unsigned char *)strchr((const char *)p, '}');
if (NULL == p) return -1;
}
break;
}
else if (p[0] == 'q' && p[1] == 's' && p[2] == 'a' && p[3] == '}') {
const buffer *qs = ctx->burl->query;
if (!buffer_is_empty(qs)) {
if (NULL != strchr(b->ptr, '?')) {
if (!buffer_string_is_empty(qs))
buffer_append_string_len(b, CONST_STR_LEN("&"));
}
else {
buffer_append_string_len(b, CONST_STR_LEN("?"));
}
burl_append(b, CONST_BUF_LEN(qs), flags);
}
p+=3;
break;
}
else ++p; /* skip unrecognized char */
}
if (*p == '\0') return -1;
if (*p != '}') { /* light_isdigit(*p) */
unsigned int num = *p - '0';
++p;
if (light_isdigit(*p)) num = num * 10 + (*p++ - '0');
if (*p != '}') {
p = (const unsigned char *)strchr((const char *)p, '}');
if (NULL == p) return -1;
}
if (0 == flags) flags = BURL_ENCODE_PSNDE; /* default */
pattern[0] == '$' /*(else '%')*/
? pcre_keyvalue_buffer_append_match(b, list, n, num, flags)
: pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, flags);
}
return (int)(p + 1 - (unsigned char *)pattern - 2);
}
static void pcre_keyvalue_buffer_subst(buffer *b, const buffer *patternb, const char **list, int n, pcre_keyvalue_ctx *ctx) {
const char *pattern = patternb->ptr;
const size_t pattern_len = buffer_string_length(patternb);
@ -119,37 +260,25 @@ static void pcre_keyvalue_buffer_subst(buffer *b, const buffer *patternb, const
/* search for $... or %... pattern substitutions */
buffer_reset(b);
buffer_string_set_length(b, 0);
for (size_t k = 0; k + 1 < pattern_len; ++k) {
if (pattern[k] == '$' || pattern[k] == '%') {
size_t num = pattern[k + 1] - '0';
buffer_append_string_len(b, pattern + start, k - start);
if (!light_isdigit((unsigned char)pattern[k + 1])) {
if (pattern[k + 1] == '{') {
int num = pcre_keyvalue_buffer_subst_ext(b, pattern+k, list, n, ctx);
if (num < 0) return; /* error; truncate result */
k += (size_t)num;
} else if (light_isdigit(((unsigned char *)pattern)[k + 1])) {
unsigned int num = (unsigned int)pattern[k + 1] - '0';
pattern[k] == '$' /*(else '%')*/
? pcre_keyvalue_buffer_append_match(b, list, n, num, 0)
: pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, 0);
} else {
/* enable escape: "%%" => "%", "%a" => "%a", "$$" => "$" */
buffer_append_string_len(b, pattern+k, pattern[k] == pattern[k+1] ? 1 : 2);
} else if (pattern[k] == '$') {
/* n is always > 0 */
if (num < (size_t)n) {
buffer_append_string(b, list[num]);
}
} else if (ctx->cache) {
const struct cond_cache_t * const cache = ctx->cache;
if (num < (size_t)cache->patterncount) {
num <<= 1; /* n *= 2 */
buffer_append_string_len(b,
cache->comp_value->ptr + cache->matches[num],
cache->matches[num + 1] - cache->matches[num]);
}
} else {
#if 0
/* we have no context, we are global */
log_error_write(srv, __FILE__, __LINE__, "ss",
"used a redirect/rewrite containing a %[0-9]+ in the global scope, ignored:",
pattern);
#endif
}
k++;

1
src/mod_redirect.c

@ -160,6 +160,7 @@ URIHANDLER_FUNC(mod_redirect_uri_handler) {
ctx.burl = &burl;
burl.scheme = con->uri.scheme;
burl.authority = con->uri.authority;
burl.port = sock_addr_get_port(&con->srv_socket->addr);
burl.path = con->uri.path_raw;
burl.query = con->uri.query;

1
src/mod_rewrite.c

@ -253,6 +253,7 @@ static handler_t process_rewrite_rules(server *srv, connection *con, plugin_data
ctx.burl = &burl;
burl.scheme = con->uri.scheme;
burl.authority = con->uri.authority;
burl.port = sock_addr_get_port(&con->srv_socket->addr);
burl.path = con->uri.path_raw;
burl.query = con->uri.query;

Loading…
Cancel
Save