diff --git a/SConstruct b/SConstruct index c3b415a5..639d387e 100644 --- a/SConstruct +++ b/SConstruct @@ -261,6 +261,7 @@ vars.AddVariables( PackageVariable('with_wolfssl', 'enable wolfSSL support', 'no'), BoolVariable('with_nettle', 'enable Nettle support', 'no'), BoolVariable('with_pam', 'enable PAM auth support', 'no'), + PackageVariable('with_pcre2', 'enable pcre2 support', 'no'), PackageVariable('with_pcre', 'enable pcre support', 'yes'), PackageVariable('with_pgsql', 'enable pgsql support', 'no'), PackageVariable('with_sasl', 'enable SASL support', 'no'), @@ -691,11 +692,16 @@ if 1: LIBPAM = 'pam', ) - if env['with_pcre']: + if env['with_pcre2']: + pcre2_config = autoconf.checkProgram('pcre2', 'pcre2-config') + if not autoconf.CheckParseConfigForLib('LIBPCRE', pcre2_config + ' --cflags --libs8'): + fail("Couldn't find pcre2") + autoconf.env.Append(CPPFLAGS = [ '-DHAVE_PCRE2_H', '-DHAVE_PCRE' ]) + elif env['with_pcre']: pcre_config = autoconf.checkProgram('pcre', 'pcre-config') if not autoconf.CheckParseConfigForLib('LIBPCRE', pcre_config + ' --cflags --libs'): fail("Couldn't find pcre") - autoconf.env.Append(CPPFLAGS = [ '-DHAVE_PCRE_H', '-DHAVE_LIBPCRE' ]) + autoconf.env.Append(CPPFLAGS = [ '-DHAVE_PCRE_H', '-DHAVE_PCRE' ]) if env['with_pgsql']: if not autoconf.CheckParseConfigForLib('LIBPGSQL', 'pkg-config libpq --cflags --libs'): diff --git a/configure.ac b/configure.ac index bd9c4517..a4a9da74 100644 --- a/configure.ac +++ b/configure.ac @@ -902,6 +902,37 @@ if test "x$use_nss" = "xyes"; then fi +dnl pcre2 support +AC_MSG_NOTICE([----------------------------------------]) +AC_MSG_CHECKING([for perl regular expressions support]) +AC_ARG_WITH([pcre2], + [AS_HELP_STRING([--with-pcre2], [Enable pcre2 support (default no)])], + [WITH_PCRE2=$withval], + [WITH_PCRE2=no] +) +AC_MSG_RESULT([$WITH_PCRE2]) + +if test "$WITH_PCRE2" != no; then + if test "$WITH_PCRE2" != yes; then + PCRE_LIB="-L$WITH_PCRE2/lib -lpcre2-8" + CPPFLAGS="$CPPFLAGS -I$WITH_PCRE/include" + else + AC_PATH_PROG([PCRE2CONFIG], [pcre2-config]) + if test -n "$PCRE2CONFIG"; then + PCRE_LIB=`"$PCRE2CONFIG" --libs8` + CPPFLAGS="$CPPFLAGS `"$PCRE2CONFIG" --cflags`" + fi + fi + + if test -z "$PCRE_LIB"; then + AC_MSG_ERROR([pcre2-config not found, install the pcre2-devel package or build with --without-pcre2]) + fi + + AC_DEFINE([HAVE_PCRE], [1], [libpcre2-8]) + AC_DEFINE([HAVE_PCRE2_H], [1], [pcre.h]) + AC_SUBST([PCRE_LIB]) +fi + dnl pcre support AC_MSG_NOTICE([----------------------------------------]) AC_MSG_CHECKING([for perl regular expressions support]) @@ -912,7 +943,7 @@ AC_ARG_WITH([pcre], ) AC_MSG_RESULT([$WITH_PCRE]) -if test "$WITH_PCRE" != no; then +if test "$WITH_PCRE" != no && test "$WITH_PCRE2" = "no"; then if test "$WITH_PCRE" != yes; then PCRE_LIB="-L$WITH_PCRE/lib -lpcre" CPPFLAGS="$CPPFLAGS -I$WITH_PCRE/include" @@ -928,7 +959,7 @@ if test "$WITH_PCRE" != no; then AC_MSG_ERROR([pcre-config not found, install the pcre-devel package or build with --without-pcre]) fi - AC_DEFINE([HAVE_LIBPCRE], [1], [libpcre]) + AC_DEFINE([HAVE_PCRE], [1], [libpcre]) AC_DEFINE([HAVE_PCRE_H], [1], [pcre.h]) AC_SUBST([PCRE_LIB]) fi diff --git a/meson_options.txt b/meson_options.txt index f6687159..92c85ec0 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -98,6 +98,11 @@ option('with_pam', value: false, description: 'with PAM-support for mod_auth [default: off]', ) +option('with_pcre2', + type: 'boolean', + value: false, + description: 'with regex support [default: off]', +) option('with_pcre', type: 'boolean', value: true, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1960a6fa..d58ba8eb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,6 +26,7 @@ option(WITH_NSS "with NSS-crypto-support [default: off]") option(WITH_OPENSSL "with openssl-support [default: off]") option(WITH_WOLFSSL "with wolfSSL-support [default: off]") option(WITH_NETTLE "with Nettle-support [default: off]") +option(WITH_PCRE2 "with regex support [default: off]") option(WITH_PCRE "with regex support [default: on]" ON) option(WITH_WEBDAV_PROPS "with property-support for mod_webdav [default: off]") option(WITH_WEBDAV_LOCKS "locks in webdav [default: off]") @@ -255,7 +256,11 @@ macro(XCONFIG _package _include_DIR _link_DIR _link_FLAGS _cflags) set(XCONFIG_EXECUTABLE "${${_package}CONFIG_EXECUTABLE}") message(STATUS "found ${_package}: ${XCONFIG_EXECUTABLE}") - exec_program(${XCONFIG_EXECUTABLE} ARGS --libs OUTPUT_VARIABLE __link_FLAGS) + if(${_package} EQUAL "pcre2-config") + exec_program(${XCONFIG_EXECUTABLE} ARGS --libs8 OUTPUT_VARIABLE __link_FLAGS) + else() + exec_program(${XCONFIG_EXECUTABLE} ARGS --libs OUTPUT_VARIABLE __link_FLAGS) + endif() string(REPLACE "\n" "" ${_link_FLAGS} ${__link_FLAGS}) exec_program(${XCONFIG_EXECUTABLE} ARGS --cflags OUTPUT_VARIABLE __cflags) string(REPLACE "\n" "" ${_cflags} ${__cflags}) @@ -467,7 +472,55 @@ if(WITH_GNUTLS) endif() endif() -if(WITH_PCRE) +if(WITH_PCRE2) + ## if we have pcre2-config, use it + xconfig(pcre2-config PCRE_INCDIR PCRE_LIBDIR PCRE_LDFLAGS PCRE_CFLAGS) + if(PCRE_LDFLAGS OR PCRE_CFLAGS) + message(STATUS "found pcre2 at: LDFLAGS: ${PCRE_LDFLAGS} CFLAGS: ${PCRE_CFLAGS}") + + if(NOT PCRE_CFLAGS STREQUAL "\n") + ## if it is empty we'll get newline returned + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${PCRE_CFLAGS}") + endif() + + set(HAVE_PCRE2_H 1) + set(HAVE_PCRE 1) + else() + if(NOT WIN32) + check_include_files(pcre2.h HAVE_PCRE_H) + check_library_exists(pcre2-8 pcre_match "" HAVE_PCRE) + set(PCRE_LDFLAGS -lpcre2-8) + else() + find_path(PCRE_INCLUDE_DIR pcre2.h + /usr/local/include + /usr/include + ) + + set(PCRE_NAMES pcre2-8) + find_library(PCRE_LIBRARY + NAMES ${PCRE_NAMES} + PATHS /usr/lib /usr/local/lib + ) + + if(PCRE_INCLUDE_DIR AND PCRE_LIBRARY) + set(CMAKE_REQUIRED_INCLUDES ${PCRE_INCLUDE_DIR}) + set(CMAKE_REQUIRED_LIBRARIES ${PCRE_LIBRARY}) + check_include_files(pcre2.h HAVE_PCRE2_H) + check_library_exists(pcre2-8 pcre_match "" HAVE_PCRE) + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_LIBRARIES) + include_directories(${PCRE_INCLUDE_DIR}) + endif() + endif() + endif() + + if(NOT HAVE_PCRE2_H) + message(FATAL_ERROR "pcre2.h couldn't be found") + endif() + if(NOT HAVE_PCRE) + message(FATAL_ERROR "libpcre2-8 couldn't be found") + endif() +elseif(WITH_PCRE) ## if we have pcre-config, use it xconfig(pcre-config PCRE_INCDIR PCRE_LIBDIR PCRE_LDFLAGS PCRE_CFLAGS) if(PCRE_LDFLAGS OR PCRE_CFLAGS) @@ -479,11 +532,11 @@ if(WITH_PCRE) endif() set(HAVE_PCRE_H 1) - set(HAVE_LIBPCRE 1) + set(HAVE_PCRE 1) else() if(NOT WIN32) check_include_files(pcre.h HAVE_PCRE_H) - check_library_exists(pcre pcre_exec "" HAVE_LIBPCRE) + check_library_exists(pcre pcre_exec "" HAVE_PCRE) set(PCRE_LDFLAGS -lpcre) else() find_path(PCRE_INCLUDE_DIR pcre.h @@ -501,7 +554,7 @@ if(WITH_PCRE) set(CMAKE_REQUIRED_INCLUDES ${PCRE_INCLUDE_DIR}) set(CMAKE_REQUIRED_LIBRARIES ${PCRE_LIBRARY}) check_include_files(pcre.h HAVE_PCRE_H) - check_library_exists(pcre pcre_exec "" HAVE_LIBPCRE) + check_library_exists(pcre pcre_exec "" HAVE_PCRE) set(CMAKE_REQUIRED_INCLUDES) set(CMAKE_REQUIRED_LIBRARIES) include_directories(${PCRE_INCLUDE_DIR}) @@ -512,12 +565,12 @@ if(WITH_PCRE) if(NOT HAVE_PCRE_H) message(FATAL_ERROR "pcre.h couldn't be found") endif() - if(NOT HAVE_LIBPCRE) + if(NOT HAVE_PCRE) message(FATAL_ERROR "libpcre couldn't be found") endif() else() unset(HAVE_PCRE_H) - unset(HAVE_LIBPCRE) + unset(HAVE_PCRE) endif() if(WITH_SASL) @@ -895,7 +948,7 @@ add_executable(test_common ) add_test(NAME test_common COMMAND test_common) -if(HAVE_PCRE_H) +if(HAVE_PCRE) target_link_libraries(lighttpd ${PCRE_LDFLAGS}) add_target_properties(lighttpd COMPILE_FLAGS ${PCRE_CFLAGS}) target_link_libraries(test_common ${PCRE_LDFLAGS}) @@ -906,7 +959,7 @@ if(HAVE_PCRE_H) add_target_properties(test_mod COMPILE_FLAGS ${PCRE_CFLAGS}) endif() -if(WITH_PCRE AND (WITH_MEMCACHED OR WITH_GDBM)) +if(HAVE_PCRE AND (WITH_MEMCACHED OR WITH_GDBM)) add_and_install_library(mod_trigger_b4_dl mod_trigger_b4_dl.c) endif() diff --git a/src/base.h b/src/base.h index d0cc761a..a8e0d3e6 100644 --- a/src/base.h +++ b/src/base.h @@ -194,6 +194,9 @@ struct server { int stdin_fd; char **argv; + #ifdef HAVE_PCRE2_H + void *match_data; /*(shared and reused)*/ + #endif }; diff --git a/src/config.h.cmake b/src/config.h.cmake index d1d78059..fbcc385a 100644 --- a/src/config.h.cmake +++ b/src/config.h.cmake @@ -73,8 +73,9 @@ #cmakedefine HAVE_LIBXML /* PCRE */ +#cmakedefine HAVE_PCRE #cmakedefine HAVE_PCRE_H -#cmakedefine HAVE_LIBPCRE +#cmakedefine HAVE_PCRE2_H #cmakedefine HAVE_MALLOC_H #cmakedefine HAVE_POLL_H @@ -82,7 +83,6 @@ /* sqlite3 */ #cmakedefine HAVE_SQLITE3_H -#cmakedefine HAVE_LIBPCRE #cmakedefine HAVE_STDDEF_H #cmakedefine HAVE_STDINT_H diff --git a/src/configfile-glue.c b/src/configfile-glue.c index 9e2de5bf..f4fbea41 100644 --- a/src/configfile-glue.c +++ b/src/configfile-glue.c @@ -636,12 +636,50 @@ void config_cond_cache_reset(request_st * const r) { memset(r->cond_cache, 0, used*sizeof(cond_cache_t)); } -#ifdef HAVE_PCRE_H +#ifdef HAVE_PCRE2_H +#define PCRE2_CODE_UNIT_WIDTH 8 +#include +#elif defined(HAVE_PCRE_H) #include #endif static int config_pcre_match(request_st * const r, const data_config * const dc, const buffer * const b) { -#ifdef HAVE_PCRE_H + + #ifdef HAVE_PCRE2_H + + if (__builtin_expect( (0 == dc->capture_idx), 1)) + return pcre2_match(dc->code, (PCRE2_SPTR)BUF_PTR_LEN(b), + 0, 0, dc->match_data, NULL); + + cond_match_t * const cond_match = + r->cond_match[dc->capture_idx] = r->cond_match_data + dc->capture_idx; + pcre2_match_data *match_data = cond_match->match_data; + if (__builtin_expect( (NULL == match_data), 0)) { + /*(allocate on demand)*/ + #if 0 /*(if we did not want to share dc->match_data across requests)*/ + /* index 0 is reused for all matches for which captures not used by + * other directives within the condition, so allocate for up to 9 + * captures, plus 1 for %0 for full match. Number of captures is + * checked at startup to be <= 9 in data_config_pcre_compile() + * (future: could save a few bytes if max captures were calculated + * at startup in config_finalize()) */ + match_data = cond_match->match_data = (0 == dc->capture_idx) + ? pcre2_match_data_create(10, NULL) + : pcre2_match_data_create_from_pattern(dc->code, NULL); + #else + match_data = cond_match->match_data = + pcre2_match_data_create_from_pattern(dc->code, NULL); + #endif + force_assert(match_data); + cond_match->matches = pcre2_get_ovector_pointer(match_data); + } + cond_match->comp_value = b; /*holds pointer to b (!) for pattern subst*/ + cond_match->captures = + pcre2_match(dc->code, (PCRE2_SPTR)BUF_PTR_LEN(b), 0, 0, match_data, NULL); + return cond_match->captures; + + #elif defined(HAVE_PCRE_H) + if (__builtin_expect( (0 == dc->capture_idx), 1)) { int matches[3 * 10]; return pcre_exec(dc->regex, dc->regex_study, BUF_PTR_LEN(b), 0, 0, @@ -658,10 +696,13 @@ static int config_pcre_match(request_st * const r, const data_config * const dc, pcre_exec(dc->regex, dc->regex_study, BUF_PTR_LEN(b), 0, 0, cond_match->matches, elementsof(cond_match->matches)); return cond_match->captures; -#else + + #else + UNUSED(r); UNUSED(dc); UNUSED(b); return 0; -#endif + + #endif } diff --git a/src/configfile.c b/src/configfile.c index 5055a9e6..fbc19fb2 100644 --- a/src/configfile.c +++ b/src/configfile.c @@ -36,6 +36,11 @@ # include #endif +#ifdef HAVE_PCRE2_H +#define PCRE2_CODE_UNIT_WIDTH 8 +#include +#endif + #ifndef PATH_MAX #define PATH_MAX 4096 #endif @@ -1231,6 +1236,43 @@ int config_finalize(server *srv, const buffer *default_server_tag) { /* adjust cond_match_data list size if regex config conditions present */ if (srv->config_captures) ++srv->config_captures; + #ifdef HAVE_PCRE2_H + for (uint32_t i = 1; i < srv->config_context->used; ++i) { + data_config * const dc = + (data_config *)srv->config_context->data[i]; + if ((dc->cond == CONFIG_COND_MATCH || dc->cond == CONFIG_COND_NOMATCH) + && 0 == dc->capture_idx) { + if (__builtin_expect( (NULL == srv->match_data), 0)) { + #if 0 + /* calculate max output vector size to save a few bytes; + * currently using hard-coded ovec_max = 10 below + * (increase in code size is probably more than bytes saved) */ + uint32_t ovec_max = 0; + for (uint32_t j = i; j < srv->config_context->used; ++j) { + const data_config * const dc = + (data_config *)srv->config_context->data[j]; + if ((dc->cond == CONFIG_COND_MATCH + || dc->cond == CONFIG_COND_NOMATCH) + && 0 == dc->capture_idx) { + uint32_t v; + if (0==pcre2_pattern_info(dc->code, + PCRE2_INFO_CAPTURECOUNT,&v)) { + if (ovec_max < v) + ovec_max = v; + } + } + } + #else + uint32_t ovec_max = 10; + #endif + srv->match_data = pcre2_match_data_create(ovec_max, NULL); + force_assert(srv->match_data); + } + dc->match_data = srv->match_data; + } + } + #endif + return 1; } @@ -1416,6 +1458,9 @@ void config_free(server *srv) { array_free(srv->srvconf.modules); buffer_free(srv->srvconf.modules_dir); array_free(srv->srvconf.upload_tempdirs); + #ifdef HAVE_PCRE2_H + if (NULL == srv->match_data) pcre2_match_data_free(srv->match_data); + #endif } void config_init(server *srv) { diff --git a/src/configfile.h b/src/configfile.h index ae75e713..a181ee72 100644 --- a/src/configfile.h +++ b/src/configfile.h @@ -13,7 +13,9 @@ * for compare: comp cond string/regex */ -#ifdef HAVE_PCRE_H +#ifdef HAVE_PCRE2_H +struct pcre2_real_match_data_8; /* declaration */ +#elif defined(HAVE_PCRE_H) struct pcre_extra; /* declaration */ #endif @@ -33,10 +35,13 @@ struct data_config { data_config *next; buffer string; -#ifdef HAVE_PCRE_H + #ifdef HAVE_PCRE2_H + void *code; + struct pcre2_real_match_data_8 *match_data; + #elif defined(HAVE_PCRE_H) void *regex; struct pcre_extra *regex_study; -#endif + #endif int capture_idx; int ext; buffer comp_tag; diff --git a/src/data_config.c b/src/data_config.c index d983178c..56c3a416 100644 --- a/src/data_config.c +++ b/src/data_config.c @@ -6,7 +6,10 @@ #include #include -#ifdef HAVE_PCRE_H +#ifdef HAVE_PCRE2_H +#define PCRE2_CODE_UNIT_WIDTH 8 +#include +#elif defined(HAVE_PCRE_H) #include #ifndef PCRE_STUDY_JIT_COMPILE #define PCRE_STUDY_JIT_COMPILE 0 @@ -40,10 +43,15 @@ static void data_config_free(data_unset *d) { vector_config_weak_clear(&ds->children); free(ds->string.ptr); -#ifdef HAVE_PCRE_H + #ifdef HAVE_PCRE2_H + if (ds->code) pcre2_code_free(ds->code); + #if 0 /*(see config_finalize())*/ + if (ds->match_data) pcre2_match_data_free(ds->match_data); + #endif + #elif defined(HAVE_PCRE_H) if (ds->regex) pcre_free(ds->regex); if (ds->regex_study) pcre_free_study(ds->regex_study); -#endif + #endif free(d); } @@ -72,7 +80,58 @@ data_config *data_config_init(void) { #include "log.h" int data_config_pcre_compile(data_config * const dc, const int pcre_jit, log_error_st * const errh) { -#ifdef HAVE_PCRE_H + + #ifdef HAVE_PCRE2_H + + int errcode; + PCRE2_SIZE erroff; + PCRE2_UCHAR errbuf[1024]; + + dc->code = pcre2_compile((PCRE2_SPTR)BUF_PTR_LEN(&dc->string), + PCRE2_UTF, &errcode, &erroff, NULL); + if (NULL == dc->code) { + pcre2_get_error_message(errcode, errbuf, sizeof(errbuf)); + log_error(errh, __FILE__, __LINE__, + "pcre2_compile: %s at offset %zu, regex: %s", + (char *)errbuf, erroff, dc->string.ptr); + return 0; + } + + if (pcre_jit) { + errcode = pcre2_jit_compile(dc->code, PCRE2_JIT_COMPLETE); + if (0 != errcode) { + pcre2_get_error_message(errcode, errbuf, sizeof(errbuf)); + log_error(errh, __FILE__, __LINE__, + "pcre2_jit_compile: %s, regex: %s", + (char *)errbuf, dc->string.ptr); + } + /*return 0;*/ + } + + uint32_t captures; + errcode = pcre2_pattern_info(dc->code, PCRE2_INFO_CAPTURECOUNT, &captures); + if (0 != errcode) { + pcre2_get_error_message(errcode, errbuf, sizeof(errbuf)); + log_error(errh, __FILE__, __LINE__, + "pcre2_pattern_info: %s, regex: %s", (char *)errbuf, dc->string.ptr); + return 0; + } + else if (captures > 9) { + log_error(errh, __FILE__, __LINE__, + "Too many captures in regex, use (?:...) instead of (...): %s", + dc->string.ptr); + return 0; + } + + #if 0 /*(see config_finalize())*/ + dc->match_data = pcre2_match_data_create_from_pattern(dc->code, NULL); + force_assert(dc->match_data); + #endif + + return 1; + + #elif defined(HAVE_PCRE_H) + const char *errptr; int erroff, captures; @@ -108,12 +167,15 @@ int data_config_pcre_compile(data_config * const dc, const int pcre_jit, log_err return 0; } return 1; -#else + + #else + UNUSED(pcre_jit); log_error(errh, __FILE__, __LINE__, "can't handle '%s' as you compiled without pcre support. \n" "(perhaps just a missing pcre-devel package ?) \n", dc->comp_key); return 0; -#endif + + #endif } diff --git a/src/h2.c b/src/h2.c index fd4b8046..07b94071 100644 --- a/src/h2.c +++ b/src/h2.c @@ -2570,7 +2570,7 @@ h2_init_stream (request_st * const h2r, connection * const con) const uint32_t used = srv->config_context->used; r->conditional_is_valid = h2r->conditional_is_valid; memcpy(r->cond_cache, h2r->cond_cache, used * sizeof(cond_cache_t)); - #ifdef HAVE_PCRE_H + #ifdef HAVE_PCRE if (srv->config_captures > 1) memcpy(r->cond_match, h2r->cond_match, srv->config_captures * sizeof(cond_match_t)); diff --git a/src/keyvalue.c b/src/keyvalue.c index 4c9cf9fa..d44f812c 100644 --- a/src/keyvalue.c +++ b/src/keyvalue.c @@ -15,7 +15,10 @@ #include #include -#ifdef HAVE_PCRE_H +#ifdef HAVE_PCRE2_H +#define PCRE2_CODE_UNIT_WIDTH 8 +#include +#elif defined(HAVE_PCRE_H) #include #ifndef PCRE_STUDY_JIT_COMPILE #define PCRE_STUDY_JIT_COMPILE 0 @@ -23,11 +26,18 @@ #endif #endif +#ifdef HAVE_PCRE2_H +static struct pcre2_real_match_data_8 *keyvalue_match_data; +#endif + typedef struct pcre_keyvalue { -#ifdef HAVE_PCRE_H + #ifdef HAVE_PCRE2_H + pcre2_code *code; + struct pcre2_real_match_data_8 *match_data; + #elif defined(HAVE_PCRE_H) pcre *key; pcre_extra *key_extra; -#endif + #endif buffer value; } pcre_keyvalue; @@ -41,9 +51,9 @@ pcre_keyvalue_buffer *pcre_keyvalue_buffer_init(void) { } int pcre_keyvalue_buffer_append(log_error_st *errh, pcre_keyvalue_buffer *kvb, const buffer *key, const buffer *value, const int pcre_jit) { -#ifdef HAVE_PCRE_H - const char *errptr; - int erroff; + + #ifdef HAVE_PCRE + pcre_keyvalue *kv; if (0 == (kvb->used & 3)) { /*(allocate in groups of 4)*/ @@ -52,12 +62,70 @@ int pcre_keyvalue_buffer_append(log_error_st *errh, pcre_keyvalue_buffer *kvb, c } kv = kvb->kv + kvb->used++; - kv->key_extra = NULL; /* copy persistent config data, and elide free() in free_data below */ memcpy(&kv->value, value, sizeof(buffer)); /*buffer_copy_buffer(&kv->value, value);*/ + #ifdef HAVE_PCRE2_H + + int errcode; + PCRE2_SIZE erroff; + PCRE2_UCHAR errbuf[1024]; + + kv->code = pcre2_compile((PCRE2_SPTR)BUF_PTR_LEN(key), + PCRE2_UTF, &errcode, &erroff, NULL); + if (NULL == kv->code) { + pcre2_get_error_message(errcode, errbuf, sizeof(errbuf)); + log_error(errh, __FILE__, __LINE__, + "pcre2_compile: %s at offset %zu, regex: %s", + (char *)errbuf, erroff, key->ptr); + return 0; + } + + if (pcre_jit) { + errcode = pcre2_jit_compile(kv->code, PCRE2_JIT_COMPLETE); + if (0 != errcode) { + pcre2_get_error_message(errcode, errbuf, sizeof(errbuf)); + log_error(errh, __FILE__, __LINE__, + "pcre2_jit_compile: %s, regex: %s", (char *)errbuf, key->ptr); + /*return 0;*/ + } + } + + uint32_t captures; + errcode = pcre2_pattern_info(kv->code, PCRE2_INFO_CAPTURECOUNT, &captures); + if (0 != errcode) { + pcre2_get_error_message(errcode, errbuf, sizeof(errbuf)); + log_error(errh, __FILE__, __LINE__, + "pcre2_pattern_info: %s, regex: %s", (char *)errbuf, key->ptr); + return 0; + } + else if (captures > 19) { + log_error(errh, __FILE__, __LINE__, + "Too many captures in regex, " + "use (?:...) instead of (...): %s", key->ptr); + return 0; + } + + #if 1 /*(share single keyvalue_match_data among all keyvalue regexes)*/ + if (NULL == keyvalue_match_data) { + keyvalue_match_data = pcre2_match_data_create(20, NULL); + force_assert(keyvalue_match_data); + } + kv->match_data = keyvalue_match_data; + #else + kv->match_data = pcre2_match_data_create_from_pattern(kv->code, NULL); + force_assert(kv->match_data); + #endif + + #elif defined(HAVE_PCRE_H) + + const char *errptr; + int erroff; + + kv->key_extra = NULL; + if (NULL == (kv->key = pcre_compile(key->ptr, 0, &errptr, &erroff, NULL))) { @@ -74,7 +142,11 @@ int pcre_keyvalue_buffer_append(log_error_st *errh, pcre_keyvalue_buffer *kvb, c key->ptr, errptr); return 0; } -#else + + #endif + + #else /* !HAVE_PCRE */ + static int logged_message = 0; if (logged_message) return 1; logged_message = 1; @@ -84,29 +156,47 @@ int pcre_keyvalue_buffer_append(log_error_st *errh, pcre_keyvalue_buffer *kvb, c UNUSED(key); UNUSED(value); UNUSED(pcre_jit); -#endif + + #endif /* !HAVE_PCRE */ return 1; } void pcre_keyvalue_buffer_free(pcre_keyvalue_buffer *kvb) { -#ifdef HAVE_PCRE_H + #ifdef HAVE_PCRE pcre_keyvalue *kv = kvb->kv; for (int i = 0, used = (int)kvb->used; i < used; ++i, ++kv) { + #ifdef HAVE_PCRE2_H + if (kv->code) pcre2_code_free(kv->code); + #if 1 + if (keyvalue_match_data) { + pcre2_match_data_free(keyvalue_match_data); + keyvalue_match_data = NULL; + } + #else + if (kv->match_data) pcre2_match_data_free(kv->match_data); + #endif + #elif defined(HAVE_PCRE_H) if (kv->key) pcre_free(kv->key); if (kv->key_extra) pcre_free_study(kv->key_extra); /*free (kv->value.ptr);*//*(see pcre_keyvalue_buffer_append)*/ + #endif } if (kvb->kv) free(kvb->kv); -#endif + #endif free(kvb); } -#ifdef HAVE_PCRE_H +#ifdef HAVE_PCRE + static void pcre_keyvalue_buffer_append_match(buffer *b, const pcre_keyvalue_ctx *ctx, unsigned int num, int flags) { if (num < (unsigned int)ctx->n) { /* n is always > 0 */ + #ifdef HAVE_PCRE2_H + const PCRE2_SIZE *ovec = (PCRE2_SIZE *)ctx->ovec; + #elif defined(HAVE_PCRE_H) const int *ovec = (int *)ctx->ovec; + #endif const size_t off = (size_t)ovec[(num <<= 1)]; /*(num *= 2)*/ const size_t len = (size_t)ovec[num+1] - off; burl_append(b, ctx->subject + off, len, flags); @@ -117,7 +207,11 @@ static void pcre_keyvalue_buffer_append_ctxmatch(buffer *b, const pcre_keyvalue_ const struct cond_match_t * const cache = ctx->cache; if (!cache) return; /* no enclosing match context */ if (num < (unsigned int)cache->captures) { + #ifdef HAVE_PCRE2_H + const PCRE2_SIZE *ovec = (PCRE2_SIZE *)cache->matches; + #elif defined(HAVE_PCRE_H) const int *ovec = (int *)cache->matches; + #endif const size_t off = (size_t)ovec[(num <<= 1)]; /*(num *= 2)*/ const size_t len = (size_t)ovec[num+1] - off; burl_append(b, cache->comp_value->ptr + off, len, flags); @@ -305,15 +399,23 @@ static void pcre_keyvalue_buffer_subst(buffer *b, const buffer *patternb, const handler_t pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer *kvb, pcre_keyvalue_ctx *ctx, const buffer *input, buffer *result) { const pcre_keyvalue *kv = kvb->kv; for (int i = 0, used = (int)kvb->used; i < used; ++i, ++kv) { + #ifdef HAVE_PCRE2_H + int n = pcre2_match(kv->code, (PCRE2_SPTR)BUF_PTR_LEN(input), + 0, 0, kv->match_data, NULL); + #else #define N 20 int ovec[N * 3]; #undef N int n = pcre_exec(kv->key, kv->key_extra, BUF_PTR_LEN(input), 0, 0, ovec, sizeof(ovec)/sizeof(int)); + #endif if (n < 0) { - if (n != PCRE_ERROR_NOMATCH) { + #ifdef HAVE_PCRE2_H + if (n != PCRE2_ERROR_NOMATCH) + #else + if (n != PCRE_ERROR_NOMATCH) + #endif return HANDLER_ERROR; - } } else if (buffer_is_blank(&kv->value)) { /* short-circuit if blank replacement pattern @@ -325,7 +427,11 @@ handler_t pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer *kvb, pcre_key ctx->m = i; ctx->n = n; ctx->subject = input->ptr; + #ifdef HAVE_PCRE2_H + ctx->ovec = pcre2_get_ovector_pointer(kv->match_data); + #else ctx->ovec = ovec; + #endif pcre_keyvalue_buffer_subst(result, &kv->value, ctx); return HANDLER_FINISHED; } @@ -333,7 +439,9 @@ handler_t pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer *kvb, pcre_key return HANDLER_GO_ON; } -#else + +#else /* !HAVE_PCRE */ + handler_t pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer *kvb, pcre_keyvalue_ctx *ctx, const buffer *input, buffer *result) { UNUSED(kvb); UNUSED(ctx); @@ -341,7 +449,8 @@ handler_t pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer *kvb, pcre_key UNUSED(result); return HANDLER_GO_ON; } -#endif + +#endif /* !HAVE_PCRE */ /* modified from burl_normalize_basic() to handle %% extra encoding layer */ diff --git a/src/meson.build b/src/meson.build index 8e099f5f..dc9b75fd 100644 --- a/src/meson.build +++ b/src/meson.build @@ -524,13 +524,20 @@ if get_option('with_nss') endif libpcre = [] -if get_option('with_pcre') +if get_option('with_pcre2') + # manual search: + # header: pcre2.h + # function: pcre_match (-lpcre2-8) + libpcre = [ dependency('libpcre2-8') ] + conf_data.set('HAVE_PCRE2_H', true) + conf_data.set('HAVE_PCRE', true) +elif get_option('with_pcre') # manual search: # header: pcre.h # function: pcre_exec (-lpcre) libpcre = [ dependency('libpcre') ] conf_data.set('HAVE_PCRE_H', true) - conf_data.set('HAVE_LIBPCRE', true) + conf_data.set('HAVE_PCRE', true) endif libpq = [] diff --git a/src/mod_status.c b/src/mod_status.c index 9998ef2c..8c335721 100644 --- a/src/mod_status.c +++ b/src/mod_status.c @@ -834,7 +834,7 @@ static handler_t mod_status_handle_server_config(request_st * const r) { " \n")); mod_status_header_append(b, CONST_STR_LEN("Server-Features")); -#ifdef HAVE_PCRE_H +#ifdef HAVE_PCRE mod_status_row_append(b, CONST_STR_LEN("RegEx Conditionals"), CONST_STR_LEN("enabled")); #else mod_status_row_append(b, CONST_STR_LEN("RegEx Conditionals"), CONST_STR_LEN("disabled - pcre missing")); diff --git a/src/plugin_config.h b/src/plugin_config.h index 93ebdbd2..f61646c9 100644 --- a/src/plugin_config.h +++ b/src/plugin_config.h @@ -165,10 +165,20 @@ typedef struct cond_cache_t { int8_t local_result; /*(cond_result_t)*/ } cond_cache_t; /* 2 bytes (2^1) */ +#ifdef HAVE_PCRE2_H +struct pcre2_real_match_data_8; /* declaration */ +#endif + typedef struct cond_match_t { const buffer *comp_value; /* just a pointer */ + #ifdef HAVE_PCRE2_H + struct pcre2_real_match_data_8 *match_data; + int captures; + void *matches; /* (PCRE2_SIZE *) */ + #elif defined(HAVE_PCRE_H) int captures; int matches[3 * 10]; + #endif } cond_match_t; int config_check_cond(request_st *r, int context_ndx); diff --git a/src/reqpool.c b/src/reqpool.c index dab31702..9ffc1ddd 100644 --- a/src/reqpool.c +++ b/src/reqpool.c @@ -18,6 +18,11 @@ #include "request.h" #include "response.h" +#ifdef HAVE_PCRE2_H +#define PCRE2_CODE_UNIT_WIDTH 8 +#include +#endif + static const request_config *request_config_defaults; @@ -60,7 +65,7 @@ request_init_data (request_st * const r, connection * const con, server * const r->cond_cache = calloc(srv->config_context->used, sizeof(cond_cache_t)); force_assert(NULL != r->cond_cache); - #ifdef HAVE_PCRE_H + #ifdef HAVE_PCRE if (srv->config_captures) {/*(save 128b per con if no regex conditions)*/ r->cond_match = calloc(srv->config_captures, sizeof(cond_match_t *)); force_assert(NULL != r->cond_match); @@ -225,9 +230,17 @@ request_free_data (request_st * const r) free(r->plugin_ctx); free(r->cond_cache); - #ifdef HAVE_PCRE_H - free(r->cond_match); - free(r->cond_match_data); + #ifdef HAVE_PCRE + if (r->cond_match_data) { + #ifdef HAVE_PCRE2_H + for (int i = 0, used = r->con->srv->config_captures; i < used; ++i) { + if (r->cond_match_data[i].match_data) + pcre2_match_data_free(r->cond_match_data[i].match_data); + } + #endif + free(r->cond_match_data); + free(r->cond_match); + } #endif /* note: r is not zeroed here and r is not freed here */ diff --git a/src/server.c b/src/server.c index e0fffce9..c3a4f27d 100644 --- a/src/server.c +++ b/src/server.c @@ -662,7 +662,7 @@ static void show_features (void) { #else "\t- Nettle support\n" #endif -#ifdef HAVE_LIBPCRE +#ifdef HAVE_PCRE "\t+ PCRE support\n" #else "\t- PCRE support\n" diff --git a/src/t/test_keyvalue.c b/src/t/test_keyvalue.c index b5bcd137..57c8bcd3 100644 --- a/src/t/test_keyvalue.c +++ b/src/t/test_keyvalue.c @@ -65,6 +65,10 @@ static void test_keyvalue_pcre_keyvalue_buffer_process (void) { memset(&cache, 0, sizeof(cache)); cache.comp_value = authority; cache.captures = 2; + #ifdef HAVE_PCRE2_H + PCRE2_SIZE matches[4]; + cache.matches = matches; + #endif cache.matches[0] = 0; cache.matches[1] = 15; cache.matches[2] = 0;