From 72011fbedef2368d2c45e0298f9021b9d89eabeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20B=C3=BChler?= Date: Fri, 6 Jun 2014 13:41:30 +0200 Subject: [PATCH] [mox_rewrite,mod_proxy,docs] fix request.raw_path handling (includes query-string) --- doc/core_config.xml | 2 +- doc/mod_proxy.xml | 2 +- doc/mod_rewrite.xml | 4 +- include/lighttpd/url_parser.h | 7 +++ src/main/url_parser.rl | 26 +++++++++ src/modules/mod_proxy.c | 5 -- src/modules/mod_rewrite.c | 103 ++++++++++++++++++++++------------ tests/t-mod-proxy.py | 12 ++-- tests/t-rewrite.py | 11 ++++ 9 files changed, 121 insertions(+), 51 deletions(-) diff --git a/doc/core_config.xml b/doc/core_config.xml index bec7856..cd9c959 100644 --- a/doc/core_config.xml +++ b/doc/core_config.xml @@ -299,7 +299,7 @@ | request.remoteip | ip address of the client | | request.remoteport | port number of the client, -1 for unix sockets | | request.path | the _path_ part of the requested url. not including the querystring. | - | request.raw_path | the raw _path_ part (not urldecoded, not simplified) of the requested url. not including the querystring. | + | request.raw_path | the raw _path_ (not urldecoded, not simplified) of the requested url, including the querystring. | | request.host | requested hostname | | request.scheme | scheme of the request. "http" or "https" | | request.query | the _querystring_ of the requested url | diff --git a/doc/mod_proxy.xml b/doc/mod_proxy.xml index 2f2f2ea..b0339fb 100644 --- a/doc/mod_proxy.xml +++ b/doc/mod_proxy.xml @@ -9,7 +9,7 @@ diff --git a/doc/mod_rewrite.xml b/doc/mod_rewrite.xml index 02db6a0..eaf6e4c 100644 --- a/doc/mod_rewrite.xml +++ b/doc/mod_rewrite.xml @@ -84,7 +84,9 @@ Similar to "@rewrite@":mod_rewrite.html#mod_rewrite__action_rewrite, but matches the raw path (i.e. the path before URL decoding and sanitizing) and the result is decoded again. - "@rewrite@":mod_rewrite.html#mod_rewrite__action_rewrite write the result to @request.path@ and uses URL encoding to generate @request.raw_path@; "@rewrite_raw@":mod_rewrite.html#mod_rewrite__action_rewrite_raw writes @request.raw_path@ and decodes it into @request.path@. In both cases @request.path@ gets simplified afterwards. + "@rewrite@":mod_rewrite.html#mod_rewrite__action_rewrite writes the result to @request.path@ and possibly @request.query@ and uses URL encoding to generate @request.raw_path@ from those. + "@rewrite_raw@":mod_rewrite.html#mod_rewrite__action_rewrite_raw writes @request.raw_path@ and decodes it into @request.path@ and @request.query@; this means the query string is always overwritten. + In both cases @request.path@ gets simplified afterwards. diff --git a/include/lighttpd/url_parser.h b/include/lighttpd/url_parser.h index 667c0c5..370b3d8 100644 --- a/include/lighttpd/url_parser.h +++ b/include/lighttpd/url_parser.h @@ -3,7 +3,14 @@ #include +/* parses uri->raw into all components, which have to be reset/initialized before */ LI_API gboolean li_parse_raw_url(liRequestUri *uri); + +/* parse input into uri->path, uri->raw_path and uri->query, which get truncated before. + * also decodes and simplifies path on success + */ +LI_API gboolean li_parse_raw_path(liRequestUri *uri, GString *input); + LI_API gboolean li_parse_hostname(liRequestUri *uri); #endif diff --git a/src/main/url_parser.rl b/src/main/url_parser.rl index 81e3318..320cca2 100644 --- a/src/main/url_parser.rl +++ b/src/main/url_parser.rl @@ -77,6 +77,7 @@ URI = (scheme >mark %save_scheme) "://" (authority >mark %save_authority) URI_path; parse_URI := URI | ("*" >mark %save_path) | URI_path; + parse_URI_path := URI_path; parse_Hostname := (host >mark_host %save_host) ( ":" port )?; write data; @@ -98,6 +99,31 @@ gboolean li_parse_raw_url(liRequestUri *uri) { return (cs >= url_parser_first_final); } +gboolean li_parse_raw_path(liRequestUri *uri, GString *input) { + const char *p, *pe, *eof; + const char *mark = NULL, *host_mark = NULL; + int cs; + + p = input->str; + eof = pe = input->str + input->len; + + g_string_truncate(uri->path, 0); + g_string_truncate(uri->raw_path, 0); + g_string_truncate(uri->query, 0); + + %% write init nocs; + cs = url_parser_en_parse_URI_path; + + %% write exec; + + if (cs >= url_parser_first_final) { + li_url_decode(uri->path); + li_path_simplify(uri->path); + } + + return (cs >= url_parser_first_final); +} + gboolean li_parse_hostname(liRequestUri *uri) { const char *p, *pe, *eof; const char *mark = NULL, *host_mark = NULL; diff --git a/src/modules/mod_proxy.c b/src/modules/mod_proxy.c index 9a568df..30a24cd 100644 --- a/src/modules/mod_proxy.c +++ b/src/modules/mod_proxy.c @@ -50,11 +50,6 @@ static void proxy_send_headers(liVRequest *vr, liChunkQueue *out) { g_string_append_len(head, GSTR_LEN(vr->request.uri.raw_path)); - if (vr->request.uri.query->len > 0) { - g_string_append_len(head, CONST_STR_LEN("?")); - g_string_append_len(head, GSTR_LEN(vr->request.uri.query)); - } - switch (vr->request.http_version) { case LI_HTTP_VERSION_1_1: /* g_string_append_len(head, CONST_STR_LEN(" HTTP/1.1\r\n")); */ diff --git a/src/modules/mod_rewrite.c b/src/modules/mod_rewrite.c index 44af2f1..be83a9f 100644 --- a/src/modules/mod_rewrite.c +++ b/src/modules/mod_rewrite.c @@ -11,6 +11,7 @@ #include #include #include +#include LI_API gboolean mod_rewrite_init(liModules *mods, liModule *mod); LI_API gboolean mod_rewrite_free(liModules *mods, liModule *mod); @@ -25,23 +26,24 @@ typedef struct rewrite_data rewrite_data; struct rewrite_data { GArray *rules; liPlugin *p; - gboolean raw; }; -static gboolean rewrite_rule_parse(liServer *srv, GString *regex, GString *str, rewrite_rule *rule) { - gchar *qs; +static gboolean rewrite_rule_parse(liServer *srv, GString *regex, GString *str, rewrite_rule *rule, gboolean raw) { + gchar *qs = NULL; rule->path = rule->querystring = NULL; rule->regex = NULL; - /* find "not-escaped" ? */ - for (qs = str->str; *qs; qs++) { - if ('\\' == *qs) { - qs++; - if (!*qs) break; - } else if ('?' == *qs) break; + if (!raw) { + /* find "not-escaped" ? */ + for (qs = str->str; *qs; qs++) { + if ('\\' == *qs) { + qs++; + if (!*qs) break; + } else if ('?' == *qs) break; + } + if (!*qs) qs = NULL; } - if (!*qs) qs = NULL; if (NULL != qs) { *qs = '\0'; /* restore later */ @@ -112,10 +114,11 @@ static gboolean rewrite_internal(liVRequest *vr, GString *dest_path, GString *de } g_string_truncate(dest_path, 0); - g_string_truncate(dest_query, 0); + if (NULL != dest_query) g_string_truncate(dest_query, 0); li_pattern_eval(vr, dest_path, rule->path, li_pattern_regex_cb, match_info, li_pattern_regex_cb, prev_match_info); if (NULL != rule->querystring) { + LI_FORCE_ASSERT(NULL != dest_query); li_pattern_eval(vr, dest_query, rule->querystring, li_pattern_regex_cb, match_info, li_pattern_regex_cb, prev_match_info); } @@ -124,13 +127,44 @@ static gboolean rewrite_internal(liVRequest *vr, GString *dest_path, GString *de return TRUE; } +static liHandlerResult rewrite_raw(liVRequest *vr, gpointer param, gpointer *context) { + guint i; + rewrite_rule *rule; + rewrite_data *rd = param; + gboolean debug = _OPTION(vr, rd->p, 0).boolean; + gchar *path = vr->request.uri.raw_path->str; + UNUSED(context); + + for (i = 0; i < rd->rules->len; i++) { + GString *dest_path = vr->wrk->tmp_str; + + rule = &g_array_index(rd->rules, rewrite_rule, i); + + if (rewrite_internal(vr, dest_path, NULL, rule, path)) { + /* regex matched */ + if (debug) { + VR_DEBUG(vr, "rewrite_raw: path \"%s\" => \"%s\"", path, dest_path->str); + } + + if (!li_parse_raw_path(&vr->request.uri, dest_path)) return LI_HANDLER_ERROR; + + /* stop at first matching regex */ + break; + } + } + + return LI_HANDLER_GO_ON; +} + + + static liHandlerResult rewrite(liVRequest *vr, gpointer param, gpointer *context) { guint i; rewrite_rule *rule; rewrite_data *rd = param; gboolean debug = _OPTION(vr, rd->p, 0).boolean; GString *dest_query = g_string_sized_new(31); - gchar *path = rd->raw ? vr->request.uri.raw_path->str : vr->request.uri.path->str; + gchar *path = vr->request.uri.path->str; UNUSED(context); for (i = 0; i < rd->rules->len; i++) { @@ -142,45 +176,40 @@ static liHandlerResult rewrite(liVRequest *vr, gpointer param, gpointer *context /* regex matched */ if (debug) { if (NULL != rule->querystring) { - VR_DEBUG(vr, "rewrite%s: path \"%s\" => \"%s\", query \"%s\" => \"%s\"", - rd->raw ? " (raw)" : "", + VR_DEBUG(vr, "rewrite: path \"%s\" => \"%s\", query \"%s\" => \"%s\"", path, dest_path->str, vr->request.uri.query->str, dest_query->str ); } else { - VR_DEBUG(vr, "rewrite%s: path \"%s\" => \"%s\"", - rd->raw ? " (raw)" : "", + VR_DEBUG(vr, "rewrite: path \"%s\" => \"%s\"", path, dest_path->str ); } } - /* change request path */ - if (rd->raw) { - g_string_truncate(vr->request.uri.raw_path, 0); - g_string_append_len(vr->request.uri.raw_path, GSTR_LEN(dest_path)); - g_string_truncate(vr->request.uri.path, 0); - g_string_append_len(vr->request.uri.path, GSTR_LEN(dest_path)); - li_url_decode(vr->request.uri.path); - } else { - g_string_truncate(vr->request.uri.path, 0); - g_string_append_len(vr->request.uri.path, GSTR_LEN(dest_path)); - li_string_encode(vr->request.uri.path->str, vr->request.uri.raw_path, LI_ENCODING_URI); - } - li_path_simplify(vr->request.uri.path); - /* change request query */ if (NULL != rule->querystring) { g_string_truncate(vr->request.uri.query, 0); g_string_append_len(vr->request.uri.query, GSTR_LEN(dest_query)); } + /* change request path */ + g_string_truncate(vr->request.uri.path, 0); + g_string_append_len(vr->request.uri.path, GSTR_LEN(dest_path)); + li_path_simplify(vr->request.uri.path); + + /* rebuild raw_path */ + li_string_encode(vr->request.uri.path->str, vr->request.uri.raw_path, LI_ENCODING_URI); + if (vr->request.uri.query->len > 0) { + g_string_append_len(vr->request.uri.raw_path, CONST_STR_LEN("?")); + g_string_append_len(vr->request.uri.raw_path, GSTR_LEN(vr->request.uri.query)); + } + /* stop at first matching regex */ - goto out; + break; } } -out: g_string_free(dest_query, TRUE); return LI_HANDLER_GO_ON; } @@ -208,6 +237,7 @@ static void rewrite_free(liServer *srv, gpointer param) { static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liValue *val, gpointer userdata) { rewrite_data *rd; + gboolean raw = GPOINTER_TO_INT(userdata); UNUSED(wrk); val = li_value_get_single_argument(val); @@ -220,13 +250,12 @@ static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liVal rd = g_slice_new(rewrite_data); rd->p = p; rd->rules = g_array_new(FALSE, FALSE, sizeof(rewrite_rule)); - rd->raw = GPOINTER_TO_INT(userdata); if (LI_VALUE_STRING == li_value_type(val)) { /* rewrite "/foo/bar"; */ rewrite_rule rule = { NULL, NULL, NULL }; - if (!rewrite_rule_parse(srv, NULL, val->data.string, &rule)) { + if (!rewrite_rule_parse(srv, NULL, val->data.string, &rule, raw)) { rewrite_free(NULL, rd); ERROR(srv, "rewrite: error parsing rule \"%s\"", val->data.string->str); return NULL; @@ -237,7 +266,7 @@ static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liVal /* only one rule */ rewrite_rule rule = { NULL, NULL, NULL }; - if (!rewrite_rule_parse(srv, li_value_list_at(val, 0)->data.string, li_value_list_at(val, 1)->data.string, &rule)) { + if (!rewrite_rule_parse(srv, li_value_list_at(val, 0)->data.string, li_value_list_at(val, 1)->data.string, &rule, raw)) { rewrite_free(NULL, rd); return NULL; } @@ -255,7 +284,7 @@ static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liVal return NULL; } - if (!rewrite_rule_parse(srv, li_value_list_at(v, 0)->data.string, li_value_list_at(v, 1)->data.string, &rule)) { + if (!rewrite_rule_parse(srv, li_value_list_at(v, 0)->data.string, li_value_list_at(v, 1)->data.string, &rule, raw)) { rewrite_free(NULL, rd); return NULL; } @@ -264,7 +293,7 @@ static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liVal LI_VALUE_END_FOREACH() } - return li_action_new_function(rewrite, NULL, rewrite_free, rd); + return li_action_new_function(raw ? rewrite_raw : rewrite, NULL, rewrite_free, rd); } diff --git a/tests/t-mod-proxy.py b/tests/t-mod-proxy.py index 03c53ec..df6bdb4 100644 --- a/tests/t-mod-proxy.py +++ b/tests/t-mod-proxy.py @@ -14,8 +14,8 @@ self_proxy; # need vhost for next test class TestEncodedURL(CurlRequest): - URL = "/some%2Ffile" - EXPECT_RESPONSE_BODY = "/dest%2Ffile" + URL = "/some%2Ffile?abc" + EXPECT_RESPONSE_BODY = "/dest%2Ffile?abc" EXPECT_RESPONSE_CODE = 200 no_docroot = True config = """ @@ -25,8 +25,8 @@ respond 200 => "%{req.raw_path}"; # backend gets encoded %2F and rewrites again class TestProxiedRewrittenEncodedURL(CurlRequest): - URL = "/foo%2Ffile" - EXPECT_RESPONSE_BODY = "/dest%2Ffile" + URL = "/foo%2Ffile?abc" + EXPECT_RESPONSE_BODY = "/dest%2Ffile?abc" EXPECT_RESPONSE_CODE = 200 no_docroot = True config = """ @@ -37,8 +37,8 @@ self_proxy; # backend gets decoded %2F and doesn't rewrite again class TestProxiedRewrittenDecodedURL(CurlRequest): - URL = "/foo%2Ffile" - EXPECT_RESPONSE_BODY = "/some/file" + URL = "/foo%2Ffile?abc" + EXPECT_RESPONSE_BODY = "/some/file?abc" EXPECT_RESPONSE_CODE = 200 no_docroot = True config = """ diff --git a/tests/t-rewrite.py b/tests/t-rewrite.py index 6d80ab3..f34f902 100644 --- a/tests/t-rewrite.py +++ b/tests/t-rewrite.py @@ -53,6 +53,16 @@ rewrite_raw "(/http://some%2F.*)" => "/dest$1"; respond 200 => "%{req.raw_path}"; """ +# raw match and write query string +class TestRewrite6(CurlRequest): + URL = "/http://some%2Ffile" + EXPECT_RESPONSE_BODY = "/http://some%2Ffile" + EXPECT_RESPONSE_CODE = 200 + config = """ +rewrite_raw "(/http://some%2F.*)" => "/dest?$1"; +respond 200 => "%{req.query}"; +""" + class Test(GroupTest): plain_config = """ setup { module_load "mod_rewrite"; } @@ -64,4 +74,5 @@ setup { module_load "mod_rewrite"; } TestRewrite3, TestRewrite4, TestRewrite5, + TestRewrite6, ]