From 0a61fdecacd6196f44bde9a59a3d7fb6f8351cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20B=C3=BChler?= Date: Fri, 25 Mar 2016 16:58:16 +0000 Subject: [PATCH] [buffer] refactor buffer_path_simplify (fixes #2560) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There actually was one bug: if the input consisted only of spaces, it would read one byte too much. `pre` was splitted into `pre2` and (already existing) `pre1` - the two characters which were read before the current one in `c`. Restructuring the loop eliminated some code before the loop, which was similar to the one at the end of the loop. From: Stefan Bühler git-svn-id: svn://svn.lighttpd.net/lighttpd/branches/lighttpd-1.4.x@3120 152afb58-edef-0310-8abb-c4023f1b3aa9 --- NEWS | 1 + src/buffer.c | 74 ++++++++++++++++++++++++++++++---------------------- 2 files changed, 44 insertions(+), 31 deletions(-) diff --git a/NEWS b/NEWS index f0b521e5..ca2923b9 100644 --- a/NEWS +++ b/NEWS @@ -46,6 +46,7 @@ NEWS * [core] replace array weakref with vector * [base64] fix crash due to broken force_assert * [unittests] add test_buffer and test_base64 unit tests + * [buffer] refactor buffer_path_simplify (fixes #2560) - 1.4.39 - 2016-01-02 * [core] fix memset_s call (fixes #2698) diff --git a/src/buffer.c b/src/buffer.c index 64946562..7afbedc2 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -884,9 +884,15 @@ void buffer_urldecode_query(buffer *url) { buffer_urldecode_internal(url, 1); } -/* Remove "/../", "//", "/./" parts from path, - * strips leading spaces, - * prepends "/" if not present already +/* - special case: empty string returns empty string + * - on windows or cygwin: replace \ with / + * - strip leading spaces + * - prepends "/" if not present already + * - resolve "/../", "//" and "/./" the usual way: + * the first one removes a preceding component, the other two + * get compressed to "/". + * - "/." and "/.." at the end are similar, but always leave a trailing + * "/" * * /blah/.. gets / * /blah/../foo gets /foo @@ -899,10 +905,9 @@ void buffer_urldecode_query(buffer *url) { void buffer_path_simplify(buffer *dest, buffer *src) { - int toklen; - char c, pre1; + /* current character, the one before, and the one before that from input */ + char c, pre1, pre2; char *start, *slash, *walk, *out; - unsigned short pre; force_assert(NULL != dest && NULL != src); @@ -935,53 +940,60 @@ void buffer_path_simplify(buffer *dest, buffer *src) out = dest->ptr; slash = dest->ptr; - + /* skip leading spaces */ while (*walk == ' ') { walk++; } - pre1 = *(walk++); - c = *(walk++); - pre = pre1; - if (pre1 != '/') { - pre = ('/' << 8) | pre1; + pre2 = pre1 = 0; + c = *(walk++); + /* prefix with '/' if not already present */ + if (c != '/') { + pre1 = '/'; *(out++) = '/'; } - *(out++) = pre1; - if (pre1 == '\0') { - dest->used = (out - start) + 1; - return; - } + while (c != '\0') { + /* assert((src != dest || out <= walk) && slash <= out); */ + /* the following comments about out and walk are only interesting if + * src == dest; otherwise the memory areas don't overlap anyway. + */ + pre2 = pre1; + pre1 = c; + + /* possibly: out == walk - need to read first */ + c = *walk; + *out = pre1; + + out++; + walk++; + /* (out <= walk) still true; also now (slash < out) */ - for (;;) { if (c == '/' || c == '\0') { - toklen = out - slash; - if (toklen == 3 && pre == (('.' << 8) | '.')) { + const size_t toklen = out - slash; + if (toklen == 3 && pre2 == '.' && pre1 == '.') { + /* "/../" or ("/.." at end of string) */ out = slash; + /* if there is something before "/..", there is at least one + * component, which needs to be removed */ if (out > start) { out--; while (out > start && *out != '/') out--; } + /* don't kill trailing '/' at end of path */ if (c == '\0') out++; - } else if (toklen == 1 || pre == (('/' << 8) | '.')) { + /* slash < out before, so out_new <= slash + 1 <= out_before <= walk */ + } else if (toklen == 1 || (pre2 == '/' && pre1 == '.')) { + /* "//" or "/./" or (("/" or "/.") at end of string) */ out = slash; + /* don't kill trailing '/' at end of path */ if (c == '\0') out++; + /* slash < out before, so out_new <= slash + 1 <= out_before <= walk */ } slash = out; } - - if (c == '\0') break; - - pre1 = c; - pre = (pre << 8) | pre1; - c = *walk; - *out = pre1; - - out++; - walk++; } buffer_string_set_length(dest, out - start);