[core] simplify buffer_path_simplify()

This commit is contained in:
Glenn Strauss 2021-05-08 14:34:05 -04:00
parent b2f4c00784
commit 980554bc70
10 changed files with 82 additions and 99 deletions

View File

@ -781,97 +781,83 @@ int buffer_is_valid_UTF8(const buffer *b) {
* /blah/../foo gets /foo
* /abc/./xyz gets /abc/xyz
* /abc//xyz gets /abc/xyz
*
* NOTE: src and dest can point to the same buffer, in which case,
* the operation is performed in-place.
*/
void buffer_path_simplify(buffer *dest, buffer *src)
void buffer_path_simplify(buffer *b)
{
/* current character, the one before, and the one before that from input */
char c, pre1, pre2;
char *start, *slash, *walk, *out;
if (__builtin_expect( (buffer_string_is_empty(b)), 0)) {
buffer_copy_string_len(b, CONST_STR_LEN(""));
return;
}
if (buffer_string_is_empty(src)) {
buffer_copy_string_len(dest, CONST_STR_LEN(""));
return;
}
#if defined(__WIN32) || defined(__CYGWIN__)
/* cygwin is treating \ and / the same, so we have to that too */
for (char *p = b->ptr; *p; p++) {
if (*p == '\\') *p = '/';
}
#endif
force_assert('\0' == src->ptr[src->used-1]);
char *out = b->ptr;
char * const end = b->ptr + b->used - 1;
*end = '/'; /*(end of path modified to avoid need to check '\0')*/
#if defined(__WIN32) || defined(__CYGWIN__)
/* cygwin is treating \ and / the same, so we have to that too */
{
char *p;
for (p = src->ptr; *p; p++) {
if (*p == '\\') *p = '/';
}
}
#endif
char *walk = out;
if (__builtin_expect( (*walk != '/'), 0)) {
if (walk[0] == '.' && walk[1] == '/')
*out = *++walk;
else if (walk[0] == '.' && walk[1] == '.' && walk[2] == '/')
*out = *(walk += 2);
else {
while (*++walk != '/') ;
out = walk;
}
}
++walk;
walk = src->ptr;
start = dest->ptr;
out = dest->ptr;
slash = dest->ptr;
while (walk <= end) {
/* previous char is '/' at this point (or start of string w/o '/') */
if (__builtin_expect( (walk[0] == '/'), 0)) {
/* skip repeated '/' (e.g. "///" -> "/") */
if (++walk < end)
continue;
else {
++out;
break;
}
}
else if (__builtin_expect( (walk[0] == '.'), 0)) {
/* handle "./" and "../" */
if (walk[1] == '.' && walk[2] == '/') {
/* handle "../" */
while (out > b->ptr && *--out != '/') ;
*out = '/'; /*(in case path had not started with '/')*/
if ((walk += 3) >= end) {
++out;
break;
}
else
continue;
}
else if (walk[1] == '/') {
/* handle "./" */
if ((walk += 2) >= end) {
++out;
break;
}
continue;
}
else {
/* accept "." if not part of "../" or "./" */
*++out = '.';
++walk;
}
}
/* skip leading spaces */
while (*walk == ' ') {
walk++;
}
if (*walk == '.') {
if (walk[1] == '/' || walk[1] == '\0')
++walk;
else if (walk[1] == '.' && (walk[2] == '/' || walk[2] == '\0'))
walk+=2;
}
pre1 = 0;
c = *(walk++);
while (c != '\0') {
/* assert((src != dest || out <= walk) && slash <= out); */
/* the following comments about out and walk are only interesting if
* src == dest; otherwise the memory areas don't overlap anyway.
*/
pre2 = pre1;
pre1 = c;
/* possibly: out == walk - need to read first */
c = *walk;
*out = pre1;
out++;
walk++;
/* (out <= walk) still true; also now (slash < out) */
if (c == '/' || c == '\0') {
const size_t toklen = out - slash;
if (toklen == 3 && pre2 == '.' && pre1 == '.' && *slash == '/') {
/* "/../" or ("/.." at end of string) */
out = slash;
/* if there is something before "/..", there is at least one
* component, which needs to be removed */
if (out > start) {
out--;
while (out > start && *out != '/') out--;
}
/* don't kill trailing '/' at end of path */
if (c == '\0') out++;
/* slash < out before, so out_new <= slash + 1 <= out_before <= walk */
} else if (toklen == 1 || (pre2 == '/' && pre1 == '.')) {
/* "//" or "/./" or (("/" or "/.") at end of string) */
out = slash;
/* don't kill trailing '/' at end of path */
if (c == '\0') out++;
/* slash < out before, so out_new <= slash + 1 <= out_before <= walk */
}
slash = out;
}
}
buffer_string_set_length(dest, out - start);
while ((*++out = *walk++) != '/') ;
}
*out = *end = '\0'; /* overwrite extra '/' added to end of path */
b->used = (out - b->ptr) + 1;
/*buffer_string_set_length(b, out - b->ptr);*/
}
void buffer_to_lower(buffer * const b) {

View File

@ -216,7 +216,7 @@ __attribute_pure__
int buffer_is_valid_UTF8(const buffer *b);
__attribute_nonnull__
void buffer_path_simplify(buffer *dest, buffer *src);
void buffer_path_simplify(buffer *b);
__attribute_nonnull__
void buffer_to_lower(buffer *b);

View File

@ -311,7 +311,7 @@ static int burl_normalize_path (buffer *b, buffer *t, int qs, int flags)
buffer_string_set_length(b, qs);
}
buffer_path_simplify(b, b);
buffer_path_simplify(b);
if (qs >= 0) {
qs = (int)buffer_string_length(b);

View File

@ -1464,7 +1464,7 @@ int gw_set_defaults_backend(server *srv, gw_plugin_data *p, const array *a, gw_p
"'/'; invalid: \"%s\"", ds->value.ptr);
goto error;
}
buffer_path_simplify(&ds->value, &ds->value);
buffer_path_simplify(&ds->value);
buffer_append_slash(&ds->value);
}
}

View File

@ -443,7 +443,7 @@ static void http_response_xsendfile (request_st * const r, buffer * const path,
}
return;
}
buffer_path_simplify(path, path);
buffer_path_simplify(path);
if (r->conf.force_lowercase_filenames) {
buffer_to_lower(path);
}
@ -525,7 +525,7 @@ static void http_response_xsendfile2(request_st * const r, const buffer * const
r->http_status = 502;
break;
}
buffer_path_simplify(b, b);
buffer_path_simplify(b);
if (r->conf.force_lowercase_filenames) {
buffer_to_lower(b);
}

View File

@ -238,7 +238,7 @@ SETDEFAULTS_FUNC(mod_cgi_set_defaults) {
cpk[cpv->k_id].k, ds->value.ptr);
return HANDLER_ERROR;
}
buffer_path_simplify(&ds->value, &ds->value);
buffer_path_simplify(&ds->value);
buffer_append_slash(&ds->value);
}
break;

View File

@ -500,7 +500,7 @@ static int process_ssi_stmt(request_st * const r, handler_ctx * const p, const c
"SSI invalid UTF-8 after url-decode: %s", tb->ptr);
break;
}
buffer_path_simplify(tb, tb);
buffer_path_simplify(tb);
char *sl = strrchr(r->physical.path.ptr, '/');
if (NULL == sl) break; /*(not expected)*/
buffer_copy_path_len2(p->stat_fn,
@ -524,7 +524,7 @@ static int process_ssi_stmt(request_st * const r, handler_ctx * const p, const c
"SSI invalid UTF-8 after url-decode: %s", tb->ptr);
break;
}
buffer_path_simplify(tb, tb);
buffer_path_simplify(tb);
/* we have an uri */

View File

@ -4828,7 +4828,7 @@ mod_webdav_copymove_b (request_st * const r, const plugin_config * const pconf,
http_status_set_error(r, 400);
return HANDLER_FINISHED;
}
buffer_path_simplify(dst_rel_path, dst_rel_path);
buffer_path_simplify(dst_rel_path);
if (buffer_string_is_empty(dst_rel_path) || dst_rel_path->ptr[0] != '/') {
http_status_set_error(r, 400);
return HANDLER_FINISHED;

View File

@ -936,7 +936,7 @@ int http_request_parse_target(request_st * const r, int scheme_port) {
*/
buffer_urldecode_path(&r->uri.path);
buffer_path_simplify(&r->uri.path, &r->uri.path);
buffer_path_simplify(&r->uri.path);
if (r->uri.path.ptr[0] != '/')
return http_request_header_line_invalid(r, 400,
"uri-path does not begin with '/' -> 400"); /* Bad Request */

View File

@ -9,8 +9,9 @@
static void run_buffer_path_simplify(buffer *psrc, buffer *pdest, const char *in, size_t in_len, const char *out, size_t out_len) {
buffer_copy_string_len(psrc, in, in_len);
pdest = psrc; /*(buffer_path_simplify() now takes only one arg)*/
buffer_path_simplify(pdest, psrc);
buffer_path_simplify(pdest);
if (!buffer_eq_slen(pdest, out, out_len)) {
fprintf(stderr,
@ -23,8 +24,7 @@ static void run_buffer_path_simplify(buffer *psrc, buffer *pdest, const char *in
fflush(stderr);
abort();
} else {
if (psrc != pdest) buffer_copy_buffer(psrc, pdest);
buffer_path_simplify(pdest, psrc);
buffer_path_simplify(pdest);
if (!buffer_eq_slen(pdest, out, out_len)) {
fprintf(stderr,
@ -70,14 +70,11 @@ static void test_buffer_path_simplify_with(buffer *psrc, buffer *pdest) {
static void test_buffer_path_simplify(void) {
buffer *psrc = buffer_init();
buffer *pdest = buffer_init();
/* test with using the same buffer and with using different buffers */
test_buffer_path_simplify_with(psrc, psrc);
test_buffer_path_simplify_with(pdest, psrc);
buffer_free(psrc);
buffer_free(pdest);
}
static void test_buffer_to_lower_upper(void) {