Browse Source

[mox_rewrite,mod_proxy,docs] fix request.raw_path handling (includes query-string)

master
Stefan Bühler 7 years ago
parent
commit
72011fbede
  1. 2
      doc/core_config.xml
  2. 2
      doc/mod_proxy.xml
  3. 4
      doc/mod_rewrite.xml
  4. 7
      include/lighttpd/url_parser.h
  5. 26
      src/main/url_parser.rl
  6. 5
      src/modules/mod_proxy.c
  7. 103
      src/modules/mod_rewrite.c
  8. 12
      tests/t-mod-proxy.py
  9. 11
      tests/t-rewrite.py

2
doc/core_config.xml

@ -299,7 +299,7 @@
| request.remoteip | ip address of the client |
| request.remoteport | port number of the client, -1 for unix sockets |
| request.path | the _path_ part of the requested url. not including the querystring. |
| request.raw_path | the raw _path_ part (not urldecoded, not simplified) of the requested url. not including the querystring. |
| request.raw_path | the raw _path_ (not urldecoded, not simplified) of the requested url, including the querystring. |
| request.host | requested hostname |
| request.scheme | scheme of the request. "http" or "https" |
| request.query | the _querystring_ of the requested url |

2
doc/mod_proxy.xml

@ -9,7 +9,7 @@
</parameter>
<description>
<textile><![CDATA[
proxy combines @request.raw_path@ and @request.query@ for the URL to send to the backend.
proxy uses @request.raw_path@ for the URL (including the query string) to send to the backend.
]]></textile>
</description>
<example>

4
doc/mod_rewrite.xml

@ -84,7 +84,9 @@
<textile>
Similar to "@rewrite@":mod_rewrite.html#mod_rewrite__action_rewrite, but matches the raw path (i.e. the path before URL decoding and sanitizing) and the result is decoded again.
"@rewrite@":mod_rewrite.html#mod_rewrite__action_rewrite write the result to @request.path@ and uses URL encoding to generate @request.raw_path@; "@rewrite_raw@":mod_rewrite.html#mod_rewrite__action_rewrite_raw writes @request.raw_path@ and decodes it into @request.path@. In both cases @request.path@ gets simplified afterwards.
"@rewrite@":mod_rewrite.html#mod_rewrite__action_rewrite writes the result to @request.path@ and possibly @request.query@ and uses URL encoding to generate @request.raw_path@ from those.
"@rewrite_raw@":mod_rewrite.html#mod_rewrite__action_rewrite_raw writes @request.raw_path@ and decodes it into @request.path@ and @request.query@; this means the query string is always overwritten.
In both cases @request.path@ gets simplified afterwards.
</textile>
</description>
</action>

7
include/lighttpd/url_parser.h

@ -3,7 +3,14 @@
#include <lighttpd/base.h>
/* parses uri->raw into all components, which have to be reset/initialized before */
LI_API gboolean li_parse_raw_url(liRequestUri *uri);
/* parse input into uri->path, uri->raw_path and uri->query, which get truncated before.
* also decodes and simplifies path on success
*/
LI_API gboolean li_parse_raw_path(liRequestUri *uri, GString *input);
LI_API gboolean li_parse_hostname(liRequestUri *uri);
#endif

26
src/main/url_parser.rl

@ -77,6 +77,7 @@
URI = (scheme >mark %save_scheme) "://" (authority >mark %save_authority) URI_path;
parse_URI := URI | ("*" >mark %save_path) | URI_path;
parse_URI_path := URI_path;
parse_Hostname := (host >mark_host %save_host) ( ":" port )?;
write data;
@ -98,6 +99,31 @@ gboolean li_parse_raw_url(liRequestUri *uri) {
return (cs >= url_parser_first_final);
}
gboolean li_parse_raw_path(liRequestUri *uri, GString *input) {
const char *p, *pe, *eof;
const char *mark = NULL, *host_mark = NULL;
int cs;
p = input->str;
eof = pe = input->str + input->len;
g_string_truncate(uri->path, 0);
g_string_truncate(uri->raw_path, 0);
g_string_truncate(uri->query, 0);
%% write init nocs;
cs = url_parser_en_parse_URI_path;
%% write exec;
if (cs >= url_parser_first_final) {
li_url_decode(uri->path);
li_path_simplify(uri->path);
}
return (cs >= url_parser_first_final);
}
gboolean li_parse_hostname(liRequestUri *uri) {
const char *p, *pe, *eof;
const char *mark = NULL, *host_mark = NULL;

5
src/modules/mod_proxy.c

@ -50,11 +50,6 @@ static void proxy_send_headers(liVRequest *vr, liChunkQueue *out) {
g_string_append_len(head, GSTR_LEN(vr->request.uri.raw_path));
if (vr->request.uri.query->len > 0) {
g_string_append_len(head, CONST_STR_LEN("?"));
g_string_append_len(head, GSTR_LEN(vr->request.uri.query));
}
switch (vr->request.http_version) {
case LI_HTTP_VERSION_1_1:
/* g_string_append_len(head, CONST_STR_LEN(" HTTP/1.1\r\n")); */

103
src/modules/mod_rewrite.c

@ -11,6 +11,7 @@
#include <lighttpd/base.h>
#include <lighttpd/encoding.h>
#include <lighttpd/pattern.h>
#include <lighttpd/url_parser.h>
LI_API gboolean mod_rewrite_init(liModules *mods, liModule *mod);
LI_API gboolean mod_rewrite_free(liModules *mods, liModule *mod);
@ -25,23 +26,24 @@ typedef struct rewrite_data rewrite_data;
struct rewrite_data {
GArray *rules;
liPlugin *p;
gboolean raw;
};
static gboolean rewrite_rule_parse(liServer *srv, GString *regex, GString *str, rewrite_rule *rule) {
gchar *qs;
static gboolean rewrite_rule_parse(liServer *srv, GString *regex, GString *str, rewrite_rule *rule, gboolean raw) {
gchar *qs = NULL;
rule->path = rule->querystring = NULL;
rule->regex = NULL;
/* find "not-escaped" ? */
for (qs = str->str; *qs; qs++) {
if ('\\' == *qs) {
qs++;
if (!*qs) break;
} else if ('?' == *qs) break;
if (!raw) {
/* find "not-escaped" ? */
for (qs = str->str; *qs; qs++) {
if ('\\' == *qs) {
qs++;
if (!*qs) break;
} else if ('?' == *qs) break;
}
if (!*qs) qs = NULL;
}
if (!*qs) qs = NULL;
if (NULL != qs) {
*qs = '\0'; /* restore later */
@ -112,10 +114,11 @@ static gboolean rewrite_internal(liVRequest *vr, GString *dest_path, GString *de
}
g_string_truncate(dest_path, 0);
g_string_truncate(dest_query, 0);
if (NULL != dest_query) g_string_truncate(dest_query, 0);
li_pattern_eval(vr, dest_path, rule->path, li_pattern_regex_cb, match_info, li_pattern_regex_cb, prev_match_info);
if (NULL != rule->querystring) {
LI_FORCE_ASSERT(NULL != dest_query);
li_pattern_eval(vr, dest_query, rule->querystring, li_pattern_regex_cb, match_info, li_pattern_regex_cb, prev_match_info);
}
@ -124,13 +127,44 @@ static gboolean rewrite_internal(liVRequest *vr, GString *dest_path, GString *de
return TRUE;
}
static liHandlerResult rewrite_raw(liVRequest *vr, gpointer param, gpointer *context) {
guint i;
rewrite_rule *rule;
rewrite_data *rd = param;
gboolean debug = _OPTION(vr, rd->p, 0).boolean;
gchar *path = vr->request.uri.raw_path->str;
UNUSED(context);
for (i = 0; i < rd->rules->len; i++) {
GString *dest_path = vr->wrk->tmp_str;
rule = &g_array_index(rd->rules, rewrite_rule, i);
if (rewrite_internal(vr, dest_path, NULL, rule, path)) {
/* regex matched */
if (debug) {
VR_DEBUG(vr, "rewrite_raw: path \"%s\" => \"%s\"", path, dest_path->str);
}
if (!li_parse_raw_path(&vr->request.uri, dest_path)) return LI_HANDLER_ERROR;
/* stop at first matching regex */
break;
}
}
return LI_HANDLER_GO_ON;
}
static liHandlerResult rewrite(liVRequest *vr, gpointer param, gpointer *context) {
guint i;
rewrite_rule *rule;
rewrite_data *rd = param;
gboolean debug = _OPTION(vr, rd->p, 0).boolean;
GString *dest_query = g_string_sized_new(31);
gchar *path = rd->raw ? vr->request.uri.raw_path->str : vr->request.uri.path->str;
gchar *path = vr->request.uri.path->str;
UNUSED(context);
for (i = 0; i < rd->rules->len; i++) {
@ -142,45 +176,40 @@ static liHandlerResult rewrite(liVRequest *vr, gpointer param, gpointer *context
/* regex matched */
if (debug) {
if (NULL != rule->querystring) {
VR_DEBUG(vr, "rewrite%s: path \"%s\" => \"%s\", query \"%s\" => \"%s\"",
rd->raw ? " (raw)" : "",
VR_DEBUG(vr, "rewrite: path \"%s\" => \"%s\", query \"%s\" => \"%s\"",
path, dest_path->str,
vr->request.uri.query->str, dest_query->str
);
} else {
VR_DEBUG(vr, "rewrite%s: path \"%s\" => \"%s\"",
rd->raw ? " (raw)" : "",
VR_DEBUG(vr, "rewrite: path \"%s\" => \"%s\"",
path, dest_path->str
);
}
}
/* change request path */
if (rd->raw) {
g_string_truncate(vr->request.uri.raw_path, 0);
g_string_append_len(vr->request.uri.raw_path, GSTR_LEN(dest_path));
g_string_truncate(vr->request.uri.path, 0);
g_string_append_len(vr->request.uri.path, GSTR_LEN(dest_path));
li_url_decode(vr->request.uri.path);
} else {
g_string_truncate(vr->request.uri.path, 0);
g_string_append_len(vr->request.uri.path, GSTR_LEN(dest_path));
li_string_encode(vr->request.uri.path->str, vr->request.uri.raw_path, LI_ENCODING_URI);
}
li_path_simplify(vr->request.uri.path);
/* change request query */
if (NULL != rule->querystring) {
g_string_truncate(vr->request.uri.query, 0);
g_string_append_len(vr->request.uri.query, GSTR_LEN(dest_query));
}
/* change request path */
g_string_truncate(vr->request.uri.path, 0);
g_string_append_len(vr->request.uri.path, GSTR_LEN(dest_path));
li_path_simplify(vr->request.uri.path);
/* rebuild raw_path */
li_string_encode(vr->request.uri.path->str, vr->request.uri.raw_path, LI_ENCODING_URI);
if (vr->request.uri.query->len > 0) {
g_string_append_len(vr->request.uri.raw_path, CONST_STR_LEN("?"));
g_string_append_len(vr->request.uri.raw_path, GSTR_LEN(vr->request.uri.query));
}
/* stop at first matching regex */
goto out;
break;
}
}
out:
g_string_free(dest_query, TRUE);
return LI_HANDLER_GO_ON;
}
@ -208,6 +237,7 @@ static void rewrite_free(liServer *srv, gpointer param) {
static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liValue *val, gpointer userdata) {
rewrite_data *rd;
gboolean raw = GPOINTER_TO_INT(userdata);
UNUSED(wrk);
val = li_value_get_single_argument(val);
@ -220,13 +250,12 @@ static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liVal
rd = g_slice_new(rewrite_data);
rd->p = p;
rd->rules = g_array_new(FALSE, FALSE, sizeof(rewrite_rule));
rd->raw = GPOINTER_TO_INT(userdata);
if (LI_VALUE_STRING == li_value_type(val)) {
/* rewrite "/foo/bar"; */
rewrite_rule rule = { NULL, NULL, NULL };
if (!rewrite_rule_parse(srv, NULL, val->data.string, &rule)) {
if (!rewrite_rule_parse(srv, NULL, val->data.string, &rule, raw)) {
rewrite_free(NULL, rd);
ERROR(srv, "rewrite: error parsing rule \"%s\"", val->data.string->str);
return NULL;
@ -237,7 +266,7 @@ static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liVal
/* only one rule */
rewrite_rule rule = { NULL, NULL, NULL };
if (!rewrite_rule_parse(srv, li_value_list_at(val, 0)->data.string, li_value_list_at(val, 1)->data.string, &rule)) {
if (!rewrite_rule_parse(srv, li_value_list_at(val, 0)->data.string, li_value_list_at(val, 1)->data.string, &rule, raw)) {
rewrite_free(NULL, rd);
return NULL;
}
@ -255,7 +284,7 @@ static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liVal
return NULL;
}
if (!rewrite_rule_parse(srv, li_value_list_at(v, 0)->data.string, li_value_list_at(v, 1)->data.string, &rule)) {
if (!rewrite_rule_parse(srv, li_value_list_at(v, 0)->data.string, li_value_list_at(v, 1)->data.string, &rule, raw)) {
rewrite_free(NULL, rd);
return NULL;
}
@ -264,7 +293,7 @@ static liAction* rewrite_create(liServer *srv, liWorker *wrk, liPlugin* p, liVal
LI_VALUE_END_FOREACH()
}
return li_action_new_function(rewrite, NULL, rewrite_free, rd);
return li_action_new_function(raw ? rewrite_raw : rewrite, NULL, rewrite_free, rd);
}

12
tests/t-mod-proxy.py

@ -14,8 +14,8 @@ self_proxy;
# need vhost for next test
class TestEncodedURL(CurlRequest):
URL = "/some%2Ffile"
EXPECT_RESPONSE_BODY = "/dest%2Ffile"
URL = "/some%2Ffile?abc"
EXPECT_RESPONSE_BODY = "/dest%2Ffile?abc"
EXPECT_RESPONSE_CODE = 200
no_docroot = True
config = """
@ -25,8 +25,8 @@ respond 200 => "%{req.raw_path}";
# backend gets encoded %2F and rewrites again
class TestProxiedRewrittenEncodedURL(CurlRequest):
URL = "/foo%2Ffile"
EXPECT_RESPONSE_BODY = "/dest%2Ffile"
URL = "/foo%2Ffile?abc"
EXPECT_RESPONSE_BODY = "/dest%2Ffile?abc"
EXPECT_RESPONSE_CODE = 200
no_docroot = True
config = """
@ -37,8 +37,8 @@ self_proxy;
# backend gets decoded %2F and doesn't rewrite again
class TestProxiedRewrittenDecodedURL(CurlRequest):
URL = "/foo%2Ffile"
EXPECT_RESPONSE_BODY = "/some/file"
URL = "/foo%2Ffile?abc"
EXPECT_RESPONSE_BODY = "/some/file?abc"
EXPECT_RESPONSE_CODE = 200
no_docroot = True
config = """

11
tests/t-rewrite.py

@ -53,6 +53,16 @@ rewrite_raw "(/http://some%2F.*)" => "/dest$1";
respond 200 => "%{req.raw_path}";
"""
# raw match and write query string
class TestRewrite6(CurlRequest):
URL = "/http://some%2Ffile"
EXPECT_RESPONSE_BODY = "/http://some%2Ffile"
EXPECT_RESPONSE_CODE = 200
config = """
rewrite_raw "(/http://some%2F.*)" => "/dest?$1";
respond 200 => "%{req.query}";
"""
class Test(GroupTest):
plain_config = """
setup { module_load "mod_rewrite"; }
@ -64,4 +74,5 @@ setup { module_load "mod_rewrite"; }
TestRewrite3,
TestRewrite4,
TestRewrite5,
TestRewrite6,
]
Loading…
Cancel
Save