[core] "url-invalid-utf8-reject" normalization opt

server.http-parseopts "url-invalid-utf8-reject" url normalization option
default: "url-invalid-utf8-reject" => "enable"
master
Glenn Strauss 7 months ago
parent 467fb2bfb7
commit a01e62bb7d
  1. 34
      src/burl.c
  2. 1
      src/burl.h
  3. 8
      src/configfile.c
  4. 9
      src/t/test_burl.c

@ -66,8 +66,8 @@ static int burl_normalize_basic_unreserved_fix (buffer *b, buffer *t, int i, int
memcpy(p, s, (size_t)i);
for (; i < used; ++i, ++j) {
if (!encoded_chars_http_uri_reqd[s[i]]) {
if (s[i] == '?' && -1 == qs) qs = j;
p[j] = s[i];
if (__builtin_expect( (s[i] == '?'), 0) && -1 == qs) qs = j;
}
else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2)) {
const unsigned int x = (n1 << 4) | n2;
@ -104,7 +104,7 @@ static int burl_normalize_basic_unreserved (buffer *b, buffer *t)
for (int i = 0; i < used; ++i) {
if (!encoded_chars_http_uri_reqd[s[i]]) {
if (s[i] == '?' && -1 == qs) qs = i;
if (__builtin_expect( (s[i] == '?'), 0) && -1 == qs) qs = i;
}
else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2)
&& !burl_is_unreserved((x = (n1 << 4) | n2))) {
@ -135,11 +135,12 @@ static int burl_normalize_basic_required_fix (buffer *b, buffer *t, int i, int q
unsigned char * const p =
(unsigned char *)buffer_string_prepare_copy(t,i+(used-i)*3+1);
unsigned int n1, n2;
int invalid_utf8 = 0;
memcpy(p, s, (size_t)i);
for (; i < used; ++i, ++j) {
if (!encoded_chars_http_uri_reqd[s[i]]) {
if (s[i] == '?' && -1 == qs) qs = j;
p[j] = s[i];
if (__builtin_expect( (s[i] == '?'), 0)) qs = j;
}
else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2)) {
const unsigned int x = (n1 << 4) | n2;
@ -153,7 +154,7 @@ static int burl_normalize_basic_required_fix (buffer *b, buffer *t, int i, int q
p[j] = '%';
p[++j] = hex_chars_uc[n1]; /*(s[i+1] & 0xdf)*/
p[++j] = hex_chars_uc[n2]; /*(s[i+2] & 0xdf)*/
if (li_utf8_invalid_byte(x)) qs = -2;
invalid_utf8 |= li_utf8_invalid_byte(x);
}
i+=2;
}
@ -162,11 +163,11 @@ static int burl_normalize_basic_required_fix (buffer *b, buffer *t, int i, int q
p[j] = '%';
p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF];
p[++j] = hex_chars_uc[s[i] & 0xF];
if (li_utf8_invalid_byte(s[i])) qs = -2;
invalid_utf8 |= li_utf8_invalid_byte(s[i]);
}
}
buffer_copy_string_len(b, (char *)p, (size_t)j);
return qs;
return !invalid_utf8 ? qs : -2;
}
@ -176,17 +177,18 @@ static int burl_normalize_basic_required (buffer *b, buffer *t)
const int used = (int)buffer_clen(b);
unsigned int n1, n2, x;
int qs = -1;
int invalid_utf8 = 0;
for (int i = 0; i < used; ++i) {
if (!encoded_chars_http_uri_reqd[s[i]]) {
if (s[i] == '?' && -1 == qs) qs = i;
if (s[i] == '?') qs = i;
}
else if (s[i]=='%' && li_cton(s[i+1], n1) && li_cton(s[i+2], n2)
&& (encoded_chars_http_uri_reqd[(x = (n1 << 4) | n2)]
|| (qs < 0
? (x == '/' || x == '?')
: (x == '&' || x == '=' || x == ';' || x == '+')))) {
if (li_utf8_invalid_byte(x)) qs = -2;
invalid_utf8 |= li_utf8_invalid_byte(x);
if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */
if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */
i+=2;
@ -201,7 +203,7 @@ static int burl_normalize_basic_required (buffer *b, buffer *t)
}
}
return qs;
return !invalid_utf8 ? qs : -2;
}
@ -323,6 +325,15 @@ static int burl_normalize_path (buffer *b, buffer *t, int qs, int flags)
}
__attribute_cold__
__attribute_noinline__
__attribute_pure__
static int burl_scan_qmark (const buffer * const b) {
const char * const qmark = strchr(b->ptr, '?');
return qmark ? (int)(qmark - b->ptr) : -1;
}
int burl_normalize (buffer *b, buffer *t, int flags)
{
int qs;
@ -342,7 +353,10 @@ int burl_normalize (buffer *b, buffer *t, int flags)
qs = (flags & HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED)
? burl_normalize_basic_required(b, t)
: burl_normalize_basic_unreserved(b, t);
if (-2 == qs) return -2;
if (-2 == qs) {
if (flags & HTTP_PARSEOPT_URL_NORMALIZE_INVALID_UTF8_REJECT) return -2;
qs = burl_scan_qmark(b);
}
if (flags & HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT) {
if (burl_contains_ctrls(b)) return -2;

@ -26,6 +26,7 @@ enum burl_opts_e {
,HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE =0x400/* "." ".." "//" */
,HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT =0x800
,HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS =0x1000
,HTTP_PARSEOPT_URL_NORMALIZE_INVALID_UTF8_REJECT =0x2000
,HTTP_PARSEOPT_METHOD_GET_BODY =0x8000
};

@ -574,6 +574,8 @@ static int config_http_parseopts (server *srv, const array *a) {
opt = HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REJECT;
else if (buffer_eq_slen(k, CONST_STR_LEN("url-query-20-plus")))
opt = HTTP_PARSEOPT_URL_NORMALIZE_QUERY_20_PLUS;
else if (buffer_eq_slen(k, CONST_STR_LEN("url-invalid-utf8-reject")))
opt = HTTP_PARSEOPT_URL_NORMALIZE_INVALID_UTF8_REJECT;
else if (buffer_eq_slen(k, CONST_STR_LEN("header-strict"))) {
srv->srvconf.http_header_strict = val;
continue;
@ -631,7 +633,8 @@ static int config_http_parseopts (server *srv, const array *a) {
}
if (!(opts & (HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED
|HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED))) {
opts |= HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED;
opts |= HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED
| HTTP_PARSEOPT_URL_NORMALIZE_INVALID_UTF8_REJECT;
if (decode_2f
&& !(opts & HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_REJECT))
opts |= HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE;
@ -1521,7 +1524,8 @@ void config_init(server *srv) {
| HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED
| HTTP_PARSEOPT_URL_NORMALIZE_CTRLS_REJECT
| HTTP_PARSEOPT_URL_NORMALIZE_PATH_2F_DECODE
| HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE;
| HTTP_PARSEOPT_URL_NORMALIZE_PATH_DOTSEG_REMOVE
| HTTP_PARSEOPT_URL_NORMALIZE_INVALID_UTF8_REJECT;
srv->srvconf.modules = array_init(16);
srv->srvconf.modules_dir = LIBRARY_DIR;

@ -31,6 +31,11 @@ static void test_burl_normalize (void) {
int flags;
flags = HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED;
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%C0"), CONST_STR_LEN("/%C0"));
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\377"), CONST_STR_LEN("/%FF"));
flags = HTTP_PARSEOPT_URL_NORMALIZE_UNRESERVED
| HTTP_PARSEOPT_URL_NORMALIZE_INVALID_UTF8_REJECT;
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("no-slash"), CONST_STR_LEN("no-slash"));
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/"), CONST_STR_LEN("/"));
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc"), CONST_STR_LEN("/abc"));
@ -53,11 +58,13 @@ static void test_burl_normalize (void) {
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3a"), CONST_STR_LEN("/%3A"));
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%3A"), CONST_STR_LEN("/%3A"));
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/~test%20ä_"), CONST_STR_LEN("/~test%20%C3%A4_"));
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/%C0"), "", (size_t)-2);
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\375"), "", (size_t)-2);
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\376"), "", (size_t)-2);
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/\377"), "", (size_t)-2);
flags = HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED;
flags = HTTP_PARSEOPT_URL_NORMALIZE_REQUIRED
| HTTP_PARSEOPT_URL_NORMALIZE_INVALID_UTF8_REJECT;
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/"), CONST_STR_LEN("/"));
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc"), CONST_STR_LEN("/abc"));
run_burl_normalize(psrc, ptmp, flags, __LINE__, CONST_STR_LEN("/abc/"), CONST_STR_LEN("/abc/"));

Loading…
Cancel
Save