[core] http_range.[ch] RFC 7233 Range handling

(import from one of my development branches from 2015)
This commit is contained in:
Glenn Strauss 2021-02-25 21:42:09 -05:00
parent ce74705cea
commit d68e639b71
6 changed files with 476 additions and 1 deletions

View File

@ -793,6 +793,7 @@ add_executable(lighttpd
sock_addr_cache.c
ls-hpack/lshpack.c
algo_xxhash.c
http_range.c
network.c
network_write.c
data_config.c

View File

@ -97,6 +97,7 @@ src = server.c response.c connections.c h2.c reqpool.c \
network_write.c \
ls-hpack/lshpack.c \
algo_xxhash.c \
http_range.c \
data_config.c \
vector.c \
configfile.c configparser.c
@ -464,7 +465,8 @@ hdr = base64.h buffer.h burl.h network.h log.h http_kv.h keyvalue.h \
first.h http_chunk.h \
algo_hmac.h \
algo_md.h algo_md5.h algo_sha1.h algo_splaytree.h algo_xxhash.h \
http_auth.h http_date.h http_header.h http_vhostdb.h stream.h \
http_auth.h http_date.h http_header.h http_range.h http_vhostdb.h \
stream.h \
fdevent.h gw_backend.h connections.h base.h base_decls.h stat_cache.h \
plugin.h plugin_config.h \
http_etag.h array.h vector.h \

View File

@ -82,6 +82,7 @@ src = Split("server.c response.c connections.c h2.c reqpool.c \
sock_addr_cache.c \
ls-hpack/lshpack.c \
algo_xxhash.c \
http_range.c \
network.c \
network_write.c \
data_config.c \

453
src/http_range.c Normal file
View File

@ -0,0 +1,453 @@
/*
* http_range - HTTP Range (RFC 7233)
*
* Copyright(c) 2015,2021 Glenn Strauss gstrauss()gluelogic.com All rights reserved
* License: BSD 3-clause (same as lighttpd)
*/
#include "http_range.h"
#include <limits.h>
#include <stdlib.h> /* strtol(), strtoll() */
#include <string.h> /* memmove() */
#include <strings.h> /* strcasecmp(), strncasecmp() */
#include "buffer.h"
#include "chunk.h"
#include "http_header.h"
#include "request.h"
/* arbitrary limit for max num ranges (additional ranges are ignored) */
#undef RMAX
#define RMAX 10
/* RFC 7233 Hypertext Transfer Protocol (HTTP/1.1): Range Requests
* https://tools.ietf.org/html/rfc7233
* Range requests are an OPTIONAL feature of HTTP, designed so that recipients
* not implementing this feature (or not supporting it for the target resource)
* can respond as if it is a normal GET request without impacting
* interoperability. */
__attribute_noinline__
static int
http_range_coalesce (off_t * const restrict ranges, int n)
{
/* coalesce/combine overlapping ranges and ranges separated by a
* gap which is smaller than the overhead of sending multiple parts
* (typically around 80 bytes) ([RFC7233] 4.1 206 Partial Content)
* (ranges are known to be positive, so subtract 80 instead of add 80
* to avoid any chance of integer overflow)
* (n is limited to RMAX ranges (RMAX pairs of off_t) since a malicious set
* of ranges has n^2 cost for this simplistic algorithm)
* (sorting the ranges and then combining would lower the cost, but the
* cost should not be an issue since client should not send many ranges
* and we restrict the max number of ranges to limit abuse)
* [RFC7233] 4.1 206 Partial Content recommends:
* When a multipart response payload is generated, the server SHOULD send
* the parts in the same order that the corresponding byte-range-spec
* appeared in the received Range header field, excluding those ranges
* that were deemed unsatisfiable or that were coalesced into other ranges
*/
for (int i = 0; i+2 < n; i += 2) {
const off_t b = ranges[i];
const off_t e = ranges[i+1];
for (int j = i+2; j < n; j += 2) {
/* common case: ranges do not overlap */
if (b <= ranges[j] ? e < ranges[j]-80 : ranges[j+1] < b-80)
continue;
/* else ranges do overlap, so combine into first range */
ranges[i] = b <= ranges[j] ? b : ranges[j];
ranges[i+1] = e >= ranges[j+1] ? e : ranges[j+1];
memmove(ranges+j, ranges+j+2, (n-j-2)*sizeof(off_t));
/* restart outer loop from beginning */
n -= 2;
i = -2;
break;
}
}
return n;
}
static const char *
http_range_parse_next (const char * restrict s, const off_t len,
off_t * const restrict ranges)
{
/*(caller must check retured ranges[1] != -1, or else range was invalid)*/
/*assert(len > 0);*//*(caller must ensure len > 0)*/
char *e;
off_t n = strtoll(s, &e, 10);
ranges[1] = -1; /* invalid */
if (n >= 0) {
if (n != LLONG_MAX && n < len && s != e) {
ranges[0] = n;
while (*e == ' ' || *e == '\t') ++e;
if (*e == '-') {
n = strtoll((s = e+1), &e, 10);
if (s == e || (n == 0 && e[-1] != '0'))
ranges[1] = len-1;
else if (ranges[0] <= n && n != LLONG_MAX)
ranges[1] = n < len ? n : len-1;
}
}
}
else if (n != LLONG_MIN) {
ranges[0] = len > -n ? len + n : 0;/*('n' is negative here)*/
ranges[1] = len-1;
}
while (*e == ' ' || *e == '\t') ++e;
return e; /* ',' or '\0' or else invalid char in range request */
}
static int
http_range_parse (const char * restrict s, const off_t content_length,
off_t ranges[RMAX*2])
{
/* [RFC7233] 2.1 Byte Ranges
* If a valid byte-range-set includes at least one byte-range-spec
* with a first-byte-pos that is less than the current length of
* the representation, or at least one suffix-byte-range-spec with
* a non-zero suffix-length, then the byte-range-set is satisfiable.
* Otherwise, the byte-range-set is unsatisfiable.
*
* [RFC7233] 3.1 Range
* A server that supports range requests MAY ignore or reject a Range
* header field that consists of more than two overlapping ranges, or a
* set of many small ranges that are not listed in ascending order,
* since both are indications of either a broken client or a deliberate
* denial-of-service attack (Section 6.1). A client SHOULD NOT request
* multiple ranges that are inherently less efficient to process and
* transfer than a single range that encompasses the same data.
*/
int n = 0;
do {
s = http_range_parse_next(s, content_length, ranges+n);
if ((*s == '\0' || *s == ',') && ranges[n+1] != -1)
n += 2;
else
while (*s != '\0' && *s != ',') ++s; /*ignore invalid ranges*/
} while (*s++ != '\0' && n < RMAX*2);
/*(*s++ for multipart, increment to char after ',')*/
if (n <= 2)
return n;
/* error if n == 0 (no valid ranges)
* (if n == RMAX*2 (additional ranges > RMAX limit, if any, were ignored))*/
return http_range_coalesce(ranges, n);
}
static void
http_range_single (request_st * const r, const off_t ranges[2])
{
/* caller already checked: n == 2, content_length > 0, ranges valid */
chunkqueue * const restrict cq = &r->write_queue;
const off_t complete_length = chunkqueue_length(cq);
/* add Content-Range header */
/*(large enough for "bytes X-X/X" with 3 huge numbers)*/
uint32_t len = sizeof("bytes ")-1;
char cr[72] = "bytes ";
len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[0]);
cr[len++] = '-';
len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, ranges[1]);
cr[len++] = '/';
len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, complete_length);
http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
CONST_STR_LEN("Content-Range"), cr, len);
if (cq->first == cq->last) { /* single chunk in cq */
/* consume from cq to start of range, truncate after end of range */
if (ranges[0]) {
chunkqueue_mark_written(cq, ranges[0]);
cq->bytes_out -= ranges[0];
cq->bytes_in -= ranges[0];
}
cq->bytes_in -= complete_length - (ranges[1] + 1);
chunk * const c = cq->first;
if (c->type == FILE_CHUNK)
c->file.length = c->offset + ranges[1] - ranges[0] + 1;
else /*(c->type == MEM_CHUNK)*/
c->mem->used = c->offset + ranges[1] - ranges[0] + 1 + 1;
}
else {
/* transfer contents to temporary cq, then transfer range back */
chunkqueue tq;
memset(&tq, 0, sizeof(tq));
chunkqueue_steal(&tq, cq, complete_length);
cq->bytes_out -= complete_length;
cq->bytes_in -= complete_length;
chunkqueue_mark_written(&tq, ranges[0]);
chunkqueue_steal(cq, &tq, ranges[1] - ranges[0] + 1);
chunkqueue_reset(&tq);
}
}
__attribute_cold__
static void
http_range_multi (request_st * const r,
const off_t ranges[RMAX*2], const int n)
{
/* multiple ranges that are not ordered are not expected to be common,
* so those scenarios is not optimized here */
#define HTTP_MULTIPART_BOUNDARY "fkj49sn38dcn3"
static const char boundary_prefix[] =
"\r\n--" HTTP_MULTIPART_BOUNDARY;
static const char boundary_end[] =
"\r\n--" HTTP_MULTIPART_BOUNDARY "--\r\n";
static const char multipart_type[] =
"multipart/byteranges; boundary=" HTTP_MULTIPART_BOUNDARY;
buffer * const tb = r->tmp_buf;
buffer_copy_string_len(tb, CONST_STR_LEN(boundary_prefix));
const buffer * const content_type =
http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
CONST_STR_LEN("Content-Type"));
if (content_type) {
buffer_append_string_len(tb, CONST_STR_LEN("\r\nContent-Type: "));
buffer_append_string_len(tb, CONST_BUF_LEN(content_type));
}
buffer_append_string_len(tb,CONST_STR_LEN("\r\nContent-Range: bytes "));
const uint32_t prefix_len = buffer_string_length(tb);
http_header_response_set(r, HTTP_HEADER_CONTENT_TYPE,
CONST_STR_LEN("Content-Type"),
CONST_STR_LEN(multipart_type));
/* caller already checked: n > 2, content_length > 0, ranges valid */
chunkqueue * const restrict cq = &r->write_queue;
const off_t complete_length = chunkqueue_length(cq);
/* copy chunks for ranges to end of cq, then consume original chunks
*
* future: if ranges ordered, could use technique in http_range_single(),
* but [RFC7233] 4.1 206 Partial Content recommends:
* When a multipart response payload is generated, the server SHOULD send
* the parts in the same order that the corresponding byte-range-spec
* appeared in the received Range header field, excluding those ranges
* that were deemed unsatisfiable or that were coalesced into other ranges
* and this code path is not expected to be hot, and so not optimized.
*/
chunk * const c = (cq->first == cq->last && cq->first->type == MEM_CHUNK)
? cq->first
: NULL;
for (int i = 0; i < n; i += 2) {
/* generate boundary-header including Content-Type and Content-Range */
buffer_string_set_length(tb, prefix_len);
buffer_append_int(tb, ranges[i]);
buffer_append_string_len(tb, CONST_STR_LEN("-"));
buffer_append_int(tb, ranges[i+1]);
buffer_append_string_len(tb, CONST_STR_LEN("/"));
buffer_append_int(tb, complete_length);
buffer_append_string_len(tb, CONST_STR_LEN("\r\n\r\n"));
if (c) /* single MEM_CHUNK in original cq; not using mem_min */
chunkqueue_append_mem(cq, CONST_BUF_LEN(tb));
else
chunkqueue_append_mem_min(cq, CONST_BUF_LEN(tb));
chunkqueue_append_cq_range(cq, cq, ranges[i],
ranges[i+1] - ranges[i] + 1);
}
/* add boundary end */
chunkqueue_append_mem_min(cq, CONST_STR_LEN(boundary_end));
/* remove initial chunk(s), since duplicated into multipart ranges */
/* remove initial "\r\n" in front of first boundary string */
chunkqueue_mark_written(cq, complete_length+2);
cq->bytes_out -= complete_length+2;
cq->bytes_in -= complete_length+2;
}
__attribute_cold__
static int
http_range_not_satisfiable (request_st * const r, const off_t content_length)
{
/*(large enough for "bytes '*'/X" with 1 huge number)*/
uint32_t len = sizeof("bytes */")-1;
char cr[32] = "bytes */";
len += (uint32_t)li_itostrn(cr+len, sizeof(cr)-len, content_length);
http_header_response_set(r, HTTP_HEADER_CONTENT_RANGE,
CONST_STR_LEN("Content-Range"), cr, len);
r->handler_module = NULL;
return (r->http_status = 416); /* Range Not Satisfiable */
}
__attribute_noinline__
static int
http_range_process (request_st * const r, const buffer * const http_range)
{
const off_t content_length = chunkqueue_length(&r->write_queue);
if (0 == content_length) /*(implementation detail; see comment at top)*/
return r->http_status; /* skip Range handling if empty payload */
/* future: might skip Range if content_length is below threshold, e.g. 1k */
/* An origin server MUST ignore a Range header field that contains a
* range unit it does not understand. */
if (0 != strncasecmp(http_range->ptr, "bytes=", sizeof("bytes=")-1))
return r->http_status; /* 200 OK */
/* arbitrary limit: support up to RMAX ranges in request Range field
* (validating and coalescing overlapping ranges is not a linear algorithm)
* (use RMAX pairs of off_t to indicate too many ranges ( >= RMAX*2)) */
off_t ranges[RMAX*2];
const int n = http_range_parse(http_range->ptr+sizeof("bytes=")-1,
content_length, ranges);
/* checked above: content_length > 0, ranges valid */
if (2 == n) /* single range */
http_range_single(r, ranges);
else if (0 == n) /* 416 Range Not Satisfiable */
return http_range_not_satisfiable(r, content_length);
else /* multipart ranges */
http_range_multi(r, ranges, n);
/*(must either set Content-Length or unset prior value, if any)*/
buffer * const tb = r->tmp_buf;
buffer_clear(tb);
buffer_append_int(tb, chunkqueue_length(&r->write_queue));
http_header_response_set(r, HTTP_HEADER_CONTENT_LENGTH,
CONST_STR_LEN("Content-Length"),
CONST_BUF_LEN(tb));
return (r->http_status = 206); /* 206 Partial Content */
}
int
http_range_rfc7233 (request_st * const r)
{
const int http_status = r->http_status;
/* implementation limitation:
* limit range handling to when we have complete response
* (future: might extend this to streamed files if Content-Length known)
* (otherwise, might be unable to validate Range before send response header
* e.g. unable to handle suffix-byte-range-spec without entity length)*/
if (!r->resp_body_finished)
return http_status;
/* limit range handling to 200 responses
* [RFC7233] 3.1 Range
* The Range header field is evaluated after evaluating the precondition
* header fields defined in [RFC7232], and only if the result in absence
* of the Range header field would be a 200 (OK) response. */
if (200 != http_status)
return http_status;
/* limit range handling to GET and HEAD (further limited below to GET)
* [RFC7233] 3.1 Range
* A server MUST ignore a Range header field received with a request
* method other than GET.
*/
if (!http_method_get_or_head(r->http_method))
return http_status;
/* no "Range" in HTTP/1.0 */
if (r->http_version < HTTP_VERSION_1_1)
return http_status;
/* do not attempt to handle range if Transfer-Encoding already applied.
* skip Range processing if Content-Encoding has already been applied,
* since Range is on the unencoded content length and Content-Encoding
* might change content. This includes if mod_deflate has applied
* Content-Encoding. If Transfer-Encoding: gzip, chunked were used
* instead (not done), then Range processing could safely take place, too,
* (but mod_deflate would need to run from new hook to handle TE: gzip).
* Alternatively, lighttpd.conf could be configured to disable mod_deflate
* for Range requests:
* $REQUEST_HEADER["Range"] != "" { deflate.mimetypes = () }
*/
if ((r->resp_htags
& (light_bshift(HTTP_HEADER_TRANSFER_ENCODING)
|light_bshift(HTTP_HEADER_CONTENT_ENCODING))))
return http_status;
#if 0 /*(if Range already handled, HTTP status expected to be 206, not 200)*/
/* check if Range request already handled (single range or multipart)*/
if (light_btst(r->resp_htags, HTTP_HEADER_CONTENT_RANGE))
return http_status;
const buffer * const content_type =
http_header_response_get(r, HTTP_HEADER_CONTENT_TYPE,
CONST_STR_LEN("Content-Type"));
if (content_type
&& 0 == strncasecmp(content_type->ptr,
"multipart/byteranges",
sizeof("multipart/byteranges")-1))
return http_status;
#endif
/* optional: advertise Accept-Ranges: bytes
* Even if "Accept-Ranges: bytes" is not given,
* [RFC7233] 2.3 Accept-Ranges
* A client MAY generate range requests without having received this
* header for the resource involved.
*/
if (!light_btst(r->resp_htags, HTTP_HEADER_ACCEPT_RANGES))
http_header_response_set(r, HTTP_HEADER_ACCEPT_RANGES,
CONST_STR_LEN("Accept-Ranges"),
CONST_STR_LEN("bytes"));
else {
const buffer * const accept_ranges =
http_header_response_get(r, HTTP_HEADER_ACCEPT_RANGES,
CONST_STR_LEN("Accept-Ranges"));
if (buffer_eq_slen(accept_ranges, CONST_STR_LEN("none")))
return http_status;
}
/* limit range handling to GET
* [RFC7233] 3.1 Range
* A server MUST ignore a Range header field received with a request
* method other than GET.
*/
if (r->http_method != HTTP_METHOD_GET)
return http_status;
/* check for Range request */
const buffer * const http_range =
http_header_request_get(r, HTTP_HEADER_RANGE, CONST_STR_LEN("Range"));
if (!http_range)
return http_status;
/* [RFC7233] 3.2 If-Range
* If-Range = entity-tag / HTTP-date
* A client MUST NOT generate an If-Range header field containing an
* entity-tag that is marked as weak.
* [...]
* Note that this comparison by exact match, including when the validator
* is an HTTP-date, differs from the "earlier than or equal to" comparison
* used when evaluating an If-Unmodified-Since conditional. */
if (light_btst(r->rqst_htags, HTTP_HEADER_IF_RANGE)) {
const buffer * const if_range =
http_header_request_get(r, HTTP_HEADER_IF_RANGE,
CONST_STR_LEN("If-Range"));
/* (weak ETag W/"<etag>" will not match Last-Modified) */
const buffer * const cmp = (if_range->ptr[0] == '"')
? http_header_response_get(r, HTTP_HEADER_ETAG,
CONST_STR_LEN("ETag"))
: http_header_response_get(r, HTTP_HEADER_LAST_MODIFIED,
CONST_STR_LEN("Last-Modified"));
if (!cmp || !buffer_is_equal(if_range, cmp))
return http_status;
#if 0 /*(questionable utility; not deemed worthwhile)*/
/* (In a modular server, the following RFC recommentation might be
* expensive and invasive to implement perfectly, so only making an
* effort here to comply with known headers added within this routine
* and within the purview of Range requests)
* [RFC7233] 4.1 206 Partial Content
* If a 206 is generated in response to a request with an If-Range
* header field, the sender SHOULD NOT generate other representation
* header fields beyond those required above, because the client is
* understood to already have a prior response containing those header
* fields.
*/
http_header_response_unset(r, HTTP_HEADER_ACCEPT_RANGES,
CONST_STR_LEN("Accept-Ranges"));
#endif
}
return http_range_process(r, http_range);
}

17
src/http_range.h Normal file
View File

@ -0,0 +1,17 @@
/*
* http_range - HTTP Range (RFC 7233)
*
* Copyright(c) 2015,2021 Glenn Strauss gstrauss()gluelogic.com All rights reserved
* License: BSD 3-clause (same as lighttpd)
*/
#ifndef INCLUDED_RANGE_H
#define INCLUDED_RANGE_H
#include "first.h"
#include "base_decls.h"
int http_range_rfc7233 (request_st *r);
#endif

View File

@ -757,6 +757,7 @@ main_src = [
'sock_addr_cache.c',
'ls-hpack/lshpack.c',
'algo_xxhash.c',
'http_range.c',
'network_write.c',
'network.c',
'reqpool.c',