From 422c0980fd8ac195a4f1f7f3c60116020a2c73f8 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 13:20:24 +0200 Subject: [PATCH 01/34] Add pcre2 support (WIP) Patch substandard_pcre2-2022-06-15.patch (b740683a539f7c71dc4e918d94e7a01c2e702574e2d9b520bec7a23078ba7f58) Submitted by: Gagan Sidhu --- acconfig.h | 6 ++++ cgi.c | 6 ++-- configure.in | 53 ++++++++++++++++++++++++++-- pcrs.c | 98 +++++++++++++++++++++++++++++++++++++++------------- pcrs.h | 15 ++++++-- project.h | 42 +++++++++++++++------- 6 files changed, 176 insertions(+), 44 deletions(-) diff --git a/acconfig.h b/acconfig.h index dbf443bb..9fbe5a72 100644 --- a/acconfig.h +++ b/acconfig.h @@ -225,11 +225,17 @@ /* Define if pcre.h must be included as */ #undef PCRE_H_IN_SUBDIR +#undef PCRE2_H_IN_SUBDIR + +#undef HAVE_PCRE2 +#undef HAVE_PCRE2POSIX /* Define if pcreposix.h must be included as */ #undef PCREPOSIX_H_IN_SUBDIR +#undef PCRE2POSIX_H_IN_SUBDIR + @BOTTOM@ /* diff --git a/cgi.c b/cgi.c index d54f3f08..92db628d 100644 --- a/cgi.c +++ b/cgi.c @@ -2023,7 +2023,7 @@ jb_err template_fill(char **template_ptr, const struct map *exports) char buf[BUFFER_SIZE]; char *tmp_out_buffer; char *file_buffer; - size_t size; + size_t bufsize, size; int error; const char *flags; @@ -2032,7 +2032,7 @@ jb_err template_fill(char **template_ptr, const struct map *exports) assert(exports); file_buffer = *template_ptr; - size = strlen(file_buffer) + 1; + bufsize = strlen(file_buffer) + 1; /* * Assemble pcrs joblist from exports map @@ -2083,7 +2083,7 @@ jb_err template_fill(char **template_ptr, const struct map *exports) } else { - error = pcrs_execute(job, file_buffer, size, &tmp_out_buffer, &size); + error = pcrs_execute(job, file_buffer, bufsize, &tmp_out_buffer, &size); pcrs_free_job(job); if (NULL == tmp_out_buffer) diff --git a/configure.in b/configure.in index 471463a9..d7551eda 100644 --- a/configure.in +++ b/configure.in @@ -875,9 +875,31 @@ dnl ================================================================= dnl Note: Some systems may have the library but not the system header dnl file, so we must check for both. dnl Also check for correct version +AC_CHECK_LIB(pcre2-8, pcre2_compile_8, [ + AC_CHECK_HEADER(pcre2.h, [ + AC_EGREP_HEADER(pcre2_pattern_info, pcre2.h,[have_pcre2=yes; AC_DEFINE(HAVE_PCRE2)], [AC_MSG_WARN([[pcre2 old version installed]]); have_pcre2=no]) + ], [ + AC_CHECK_HEADER(pcre2/pcre2.h, [ + AC_EGREP_HEADER(pcre2_pattern_info, pcre2/pcre2.h, [have_pcre2=yes; AC_DEFINE(PCRE2_H_IN_SUBDIR)], [AC_MSG_WARN([[pcre2 old version installed]]); have_pcre2=no]) + ], [have_pcre2=no]) + ], [#define PCRE2_CODE_UNIT_WIDTH 8]) +], [have_pcre2=no]) + +AC_CHECK_LIB(pcre2-posix, regcomp, [ + AC_CHECK_HEADER(pcre2posix.h, [ + AC_EGREP_HEADER(pcre2_regerror, pcre2posix.h, [have_pcre2posix=yes],[AC_MSG_WARN([[pcre2posix old version installed]]); have_pcre2posix=no]) + ], [ + AC_CHECK_HEADER(pcre/pcre2posix.h, [ + AC_EGREP_HEADER(pcre2_regerror, pcre2/pcre2posix.h, [have_pcre2posix=yes; AC_DEFINE(PCRE2POSIX_H_IN_SUBDIR)],[AC_MSG_WARN([[pcre2posix old version installed]]); have_pcre2posix=no]) + ], [have_pcre2posix=no]) + ]) +], [have_pcre2posix=no], -lpcre2-8) + +if test $have_pcre2 = "no"; then + AC_CHECK_LIB(pcre, pcre_compile, [ AC_CHECK_HEADER(pcre.h, [ - AC_EGREP_HEADER(pcre_fullinfo, pcre.h, [have_pcre=yes], [AC_MSG_WARN([[pcre old version installed]]); have_pcre=no]) + AC_EGREP_HEADER(pcre_fullinfo, pcre.h, [have_pcre=yes],[AC_MSG_WARN([[pcre old version installed]]); have_pcre=no]) ], [ AC_CHECK_HEADER(pcre/pcre.h, [ AC_EGREP_HEADER(pcre_fullinfo, pcre/pcre.h, [have_pcre=yes]; [AC_DEFINE(PCRE_H_IN_SUBDIR)], [AC_MSG_WARN([[pcre old version installed]]); have_pcre=no]) @@ -895,6 +917,7 @@ AC_CHECK_LIB(pcreposix, regcomp, [ ]) ], [have_pcreposix=no], -lpcre) +fi dnl ================================================================ dnl libpcrs is temporarily disabled. dnl @@ -1101,6 +1124,31 @@ fi # we don't need pcreposix, then link pcre dynamically; else # build it and link statically # + +#check for libpcre2 first. then regular pcre + +if test $have_pcre2 = "yes"; then + echo "using libpcre2" + STATIC_PCRE_ONLY=# + LIBS="$LIBS -lpcre2-8 -lpcre2-posix" + if test "$use_static_pcre" = "yes"; then + pcre_dyn=no + AC_DEFINE(PCRE_STATIC,1,[Define to statically link to pcre library on Windows.]) +# see /usr/i686-w64-mingw32/sys-root/mingw/include/pcre.h line 54 +# #if defined(_WIN32) && !defined(PCRE_STATIC) +# # ifndef PCRE_EXP_DECL +# # define PCRE_EXP_DECL extern __declspec(dllimport) +# # endif +# If you want to statically link a program against a PCRE library in the form of +# a non-dll .a file, you must define PCRE_STATIC before including pcre.h or +# pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will +# be declared __declspec(dllimport), with unwanted results. + else + pcre_dyn=yes + AC_DEFINE(FEATURE_DYNAMIC_PCRE,1,[Define to dynamically link to pcre.]) + fi +else + if test $have_pcre = "yes"; then echo "using libpcre" STATIC_PCRE_ONLY=# @@ -1122,7 +1170,8 @@ if test $have_pcre = "yes"; then AC_DEFINE(FEATURE_DYNAMIC_PCRE,1,[Define to dynamically link to pcre.]) fi else - AC_MSG_ERROR(pcre library not detected.) + AC_MSG_ERROR(pcre2 or pcre library not detected.) +fi fi AC_DEFINE(FEATURE_CONNECTION_KEEP_ALIVE) diff --git a/pcrs.c b/pcrs.c index 007f7cc1..53a95c74 100644 --- a/pcrs.c +++ b/pcrs.c @@ -82,26 +82,26 @@ const char *pcrs_strerror(const int error) { switch (error) { - /* Passed-through PCRE error: */ - case PCRE_ERROR_NOMEMORY: return "(pcre:) No memory"; + /* Passed-through PCRE error: */ + case PCREn(ERROR_NOMEMORY): return "(pcre:) No memory"; /* Shouldn't happen unless PCRE or PCRS bug, or user messed with compiled job: */ - case PCRE_ERROR_NULL: return "(pcre:) NULL code or subject or ovector"; - case PCRE_ERROR_BADOPTION: return "(pcre:) Unrecognized option bit"; - case PCRE_ERROR_BADMAGIC: return "(pcre:) Bad magic number in code"; + case PCREn(ERROR_NULL): return "(pcre:) NULL code or subject or ovector"; + case PCREn(ERROR_BADOPTION): return "(pcre:) Unrecognized option bit"; + case PCREn(ERROR_BADMAGIC): return "(pcre:) Bad magic number in code"; +#if defined(PCRE_ERROR_UNKNOWN_NODE) case PCRE_ERROR_UNKNOWN_NODE: return "(pcre:) Bad node in pattern"; - +#endif /* Can't happen / not passed: */ - case PCRE_ERROR_NOSUBSTRING: return "(pcre:) Fire in power supply"; - case PCRE_ERROR_NOMATCH: return "(pcre:) Water in power supply"; + case PCREn(ERROR_NOSUBSTRING): return "(pcre:) Fire in power supply"; + case PCREn(ERROR_NOMATCH): return "(pcre:) Water in power supply"; #ifdef PCRE_ERROR_MATCHLIMIT /* * Only reported by PCRE versions newer than our own. */ - case PCRE_ERROR_MATCHLIMIT: return "(pcre:) Match limit reached"; + case PCREn(ERROR_MATCHLIMIT): return "(pcre:) Match limit reached"; #endif /* def PCRE_ERROR_MATCHLIMIT */ - /* PCRS errors: */ case PCRS_ERR_NOMEM: return "(pcrs:) No memory"; case PCRS_ERR_CMDSYNTAX: return "(pcrs:) Syntax error while parsing command"; @@ -163,13 +163,13 @@ static int pcrs_parse_perl_options(const char *optstring, int *flags) { case 'e': break; /* ToDo ;-) */ case 'g': *flags |= PCRS_GLOBAL; break; - case 'i': rc |= PCRE_CASELESS; break; - case 'm': rc |= PCRE_MULTILINE; break; + case 'i': rc |= PCREn(CASELESS); break; + case 'm': rc |= PCREn(MULTILINE); break; case 'o': break; - case 's': rc |= PCRE_DOTALL; break; - case 'x': rc |= PCRE_EXTENDED; break; + case 's': rc |= PCREn(DOTALL); break; + case 'x': rc |= PCREn(EXTENDED); break; case 'D': *flags |= PCRS_DYNAMIC; break; - case 'U': rc |= PCRE_UNGREEDY; break; + case 'U': rc |= PCREn(UNGREEDY); break; case 'T': *flags |= PCRS_TRIVIAL; break; default: break; } @@ -472,6 +472,7 @@ pcrs_job *pcrs_free_job(pcrs_job *job) { next = job->next; if (job->pattern != NULL) free(job->pattern); +#ifndef HAVE_PCRE2 if (job->hints != NULL) { #ifdef PCRE_CONFIG_JIT @@ -480,6 +481,7 @@ pcrs_job *pcrs_free_job(pcrs_job *job) free(job->hints); #endif } +#endif if (job->substitute != NULL) { if (job->substitute->text != NULL) free(job->substitute->text); @@ -628,10 +630,10 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * pcrs_job *newjob; int flags; int capturecount; - const char *error; int pcre_study_options = 0; - *errptr = 0; + const char *error; + unsigned char errorstr[128]; /* * Handle NULL arguments @@ -661,21 +663,32 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * /* * Compile the pattern */ +#ifdef HAVE_PCRE2 + int errcode; + newjob->pattern = pcre2_compile((const unsigned char *) pattern, PCRE2_ZERO_TERMINATED, newjob->options, + &errcode, (PCRE2_SIZE*) errptr, NULL); + pcre2_get_error_message(errcode, errorstr, 128); +#else newjob->pattern = pcre_compile(pattern, newjob->options, &error, errptr, NULL); +#endif if (newjob->pattern == NULL) { pcrs_free_job(newjob); return NULL; } - -#ifdef PCRE_STUDY_JIT_COMPILE +#if defined(PCRE_STUDY_JIT_COMPILE) || defined(HAVE_PCRE2) #ifdef DISABLE_PCRE_JIT_COMPILATION #warning PCRE_STUDY_JIT_COMPILE is supported but Privoxy has been configured not to use it #else if (!(flags & PCRS_DYNAMIC)) { +#ifdef HAVE_PCRE2 + pcre_study_options = PCRE2_JIT_COMPLETE; +#else + // pcre_study no longer exists in pcre2 pcre_study_options = PCRE_STUDY_JIT_COMPILE; +#endif } #endif #endif @@ -684,20 +697,27 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * * Generate hints. This has little overhead, since the * hints will be NULL for a boring pattern anyway. */ +#ifdef HAVE_PCRE2 + if(pcre2_jit_compile(newjob->pattern, pcre_study_options) && strlen(errorstr)) +#else newjob->hints = pcre_study(newjob->pattern, pcre_study_options, &error); if (error != NULL) +#endif { *errptr = PCRS_ERR_STUDY; pcrs_free_job(newjob); return NULL; } - /* * Determine the number of capturing subpatterns. * This is needed for handling $+ in the substitute. */ +#ifdef HAVE_PCRE2 + if (0 > (*errptr = pcre2_pattern_info(newjob->pattern, PCRE2_INFO_CAPTURECOUNT, &capturecount))) +#else if (0 > (*errptr = pcre_fullinfo(newjob->pattern, newjob->hints, PCRE_INFO_CAPTURECOUNT, &capturecount))) +#endif { pcrs_free_job(newjob); return NULL; @@ -809,14 +829,21 @@ int pcrs_execute_list(pcrs_job *joblist, char *subject, size_t subject_length, c *********************************************************************/ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char **result, size_t *result_length) { - int offsets[3 * PCRS_MAX_SUBMATCHES], - offset, + + int offset, i, k, matches_found, submatches, max_matches = PCRS_MAX_MATCH_INIT; size_t newsize; +#ifdef HAVE_PCRE2 pcrs_match *matches, *dummy; + pcre2_match_data *pcre2_matches; + size_t *offsets; +#else + pcrs_match *matches, *dummy; + int offsets[3 * PCRS_MAX_SUBMATCHES]; +#endif char *result_offset; offset = i = 0; @@ -830,20 +857,29 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char return(PCRS_ERR_BADJOB); } +#ifdef HAVE_PCRE2 + if (NULL == (pcre2_matches = pcre2_match_data_create((size_t)PCRS_MAX_SUBMATCHES, NULL))) + { + return(PCRS_ERR_NOMEM); + } + offsets = pcre2_get_ovector_pointer(pcre2_matches); +#endif if (NULL == (matches = (pcrs_match *)malloc((size_t)max_matches * sizeof(pcrs_match)))) { return(PCRS_ERR_NOMEM); } memset(matches, '\0', (size_t)max_matches * sizeof(pcrs_match)); - - /* * Find the pattern and calculate the space * requirements for the result */ newsize = subject_length; +#ifdef HAVE_PCRE2 + while ((submatches = pcre2_match(job->pattern, (const unsigned char *) subject, (int)subject_length, offset, 0, pcre2_matches, NULL)) > 0) +#else while ((submatches = pcre_exec(job->pattern, job->hints, subject, (int)subject_length, offset, 0, offsets, 3 * PCRS_MAX_SUBMATCHES)) > 0) +#endif { job->flags |= PCRS_SUCCESS; matches[i].submatches = submatches; @@ -875,6 +911,7 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char if (++i >= max_matches) { max_matches = (int)(max_matches * PCRS_MAX_MATCH_GROW); + if (NULL == (dummy = (pcrs_match *)realloc(matches, (size_t)max_matches * sizeof(pcrs_match)))) { free(matches); @@ -897,9 +934,16 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char offset = offsets[1]; } /* Pass pcre error through if (bad) failure */ +#ifdef HAVE_PCRE2 + if (submatches < PCRE2_ERROR_NOMATCH) +#else if (submatches < PCRE_ERROR_NOMATCH) +#endif { free(matches); +#ifdef HAVE_PCRE2 + pcre2_match_data_free(pcre2_matches); +#endif return submatches; } matches_found = i; @@ -912,6 +956,9 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char if ((*result = (char *)malloc(newsize + 1)) == NULL) { free(matches); +#ifdef HAVE_PCRE2 + pcre2_match_data_free(pcre2_matches); +#endif return PCRS_ERR_NOMEM; } else @@ -964,6 +1011,9 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char memcpy(result_offset, subject + offset, subject_length - (size_t)offset); *result_length = newsize; +#ifdef HAVE_PCRE2 + pcre2_match_data_free(pcre2_matches); +#endif free(matches); return matches_found; diff --git a/pcrs.h b/pcrs.h index abff3caa..0fde37b8 100644 --- a/pcrs.h +++ b/pcrs.h @@ -31,11 +31,18 @@ * Temple Place - Suite 330, Boston, MA 02111-1307, USA. * *********************************************************************/ - - +#ifdef HAVE_PCRE2 +#define PCRE2_CODE_UNIT_WIDTH 8 +#define PCREn(x) PCRE2_ ## x +#ifndef _PCRE2_H +#include +#endif +#else +#define PCREn(x) PCRE_ ## x #ifndef _PCRE_H #include #endif +#endif /* * Constants: @@ -107,8 +114,12 @@ typedef struct { /* A PCRS job */ typedef struct PCRS_JOB { +#ifdef HAVE_PCRE2 + pcre2_code *pattern; +#else pcre *pattern; /* The compiled pcre pattern */ pcre_extra *hints; /* The pcre hints for the pattern */ +#endif int options; /* The pcre options (numeric) */ int flags; /* The pcrs and user flags (see "Flags" above) */ pcrs_substitute *substitute; /* The compiled pcrs substitute */ diff --git a/project.h b/project.h index 8ff5694c..df2e02bd 100644 --- a/project.h +++ b/project.h @@ -99,12 +99,38 @@ */ #ifdef STATIC_PCRE +#ifdef HAVE_PCRE2 +# include "pcre2.h" +# include "pcre2posix.h" +#else # include "pcre.h" +# include "pcreposix.h" +#endif #else -# ifdef PCRE_H_IN_SUBDIR -# include +# ifdef HAVE_PCRE2 +# ifdef PCRE2_H_IN_SUBDIR +# define PCRE2_CODE_UNIT_WIDTH 8 +# include +# else +# define PCRE2_CODE_UNIT_WIDTH 8 +# include +# endif +# ifdef PCRE2POSIX_H_IN_SUBDIR +# include +# else +# include +# endif # else -# include +# ifdef PCRE_H_IN_SUBDIR +# include +# else +# include +# endif +# ifdef PCREPOSIX_H_IN_SUBDIR +# include +# else +# include +# endif # endif #endif @@ -114,16 +140,6 @@ # include #endif -#ifdef STATIC_PCRE -# include "pcreposix.h" -#else -# ifdef PCRE_H_IN_SUBDIR -# include -# else -# include -# endif -#endif - #ifdef _WIN32 /* * I don't want to have to #include all this just for the declaration -- 2.40.1 From 6277f9fed97ce9d78262b8e7130320224a67c36d Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Wed, 15 Mar 2023 10:41:02 +0100 Subject: [PATCH 02/34] pcrs.h: Use different values for the PCRS_* variables to prevent clashes with pcre2 --- pcrs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pcrs.h b/pcrs.h index 0fde37b8..620bca0b 100644 --- a/pcrs.h +++ b/pcrs.h @@ -74,10 +74,10 @@ * only the first part was used. */ /* Flags */ -#define PCRS_GLOBAL 1 /* Job should be applied globally, as with perl's g option */ -#define PCRS_TRIVIAL 2 /* Backreferences in the substitute are ignored */ -#define PCRS_SUCCESS 4 /* Job did previously match */ -#define PCRS_DYNAMIC 8 /* Job is dynamic (used to disable JIT compilation) */ +#define PCRS_GLOBAL 0x08000000u /* Job should be applied globally, as with perl's g option */ +#define PCRS_TRIVIAL 0x10000000u /* Backreferences in the substitute are ignored */ +#define PCRS_SUCCESS 0x20000000u /* Job did previously match */ +#define PCRS_DYNAMIC 0x40000000u /* Job is dynamic (used to disable JIT compilation) */ /* -- 2.40.1 From a1f0d09661160ac3921d87e98fdb58013e0a7917 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Wed, 15 Mar 2023 11:19:41 +0100 Subject: [PATCH 03/34] Bump PCRS_* numbers to prevent clashes with pcre2 --- pcrs.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pcrs.h b/pcrs.h index 620bca0b..32277596 100644 --- a/pcrs.h +++ b/pcrs.h @@ -65,12 +65,12 @@ * PCRE 6.7 uses error codes from -1 to -21, PCRS error codes * below -100 should be safe for a while. */ -#define PCRS_ERR_NOMEM -100 /* Failed to acquire memory. */ -#define PCRS_ERR_CMDSYNTAX -101 /* Syntax of s///-command */ -#define PCRS_ERR_STUDY -102 /* pcre error while studying the pattern */ -#define PCRS_ERR_BADJOB -103 /* NULL job pointer, pattern or substitute */ -#define PCRS_WARN_BADREF -104 /* Backreference out of range */ -#define PCRS_WARN_TRUNCATION -105 /* At least one pcrs variable was too big, +#define PCRS_ERR_NOMEM -300 /* Failed to acquire memory. */ +#define PCRS_ERR_CMDSYNTAX -301 /* Syntax of s///-command */ +#define PCRS_ERR_STUDY -302 /* pcre error while studying the pattern */ +#define PCRS_ERR_BADJOB -303 /* NULL job pointer, pattern or substitute */ +#define PCRS_WARN_BADREF -304 /* Backreference out of range */ +#define PCRS_WARN_TRUNCATION -305 /* At least one pcrs variable was too big, * only the first part was used. */ /* Flags */ -- 2.40.1 From f5aedefeac3091b2a404461b9b5e7dc3d3b05e66 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Thu, 15 Jun 2023 13:00:57 +0200 Subject: [PATCH 04/34] pcrs: Change flags to unsigend to prevent a compiler warning --- pcrs.c | 6 +++--- pcrs.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pcrs.c b/pcrs.c index 53a95c74..19fcc235 100644 --- a/pcrs.c +++ b/pcrs.c @@ -57,7 +57,7 @@ * Internal prototypes */ -static int pcrs_parse_perl_options(const char *optstring, int *flags); +static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags); static pcrs_substitute *pcrs_compile_replacement(const char *replacement, int trivialflag, int capturecount, int *errptr); static int is_hex_sequence(const char *sequence); @@ -149,7 +149,7 @@ const char *pcrs_strerror(const int error) * Returns : option integer suitable for pcre * *********************************************************************/ -static int pcrs_parse_perl_options(const char *optstring, int *flags) +static int pcrs_parse_perl_options(const char *optstring, unsigned int *flags) { size_t i; int rc = 0; @@ -628,7 +628,7 @@ pcrs_job *pcrs_compile_command(const char *command, int *errptr) pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char *options, int *errptr) { pcrs_job *newjob; - int flags; + unsigned int flags; int capturecount; int pcre_study_options = 0; *errptr = 0; diff --git a/pcrs.h b/pcrs.h index 32277596..3eae4d26 100644 --- a/pcrs.h +++ b/pcrs.h @@ -121,7 +121,7 @@ typedef struct PCRS_JOB { pcre_extra *hints; /* The pcre hints for the pattern */ #endif int options; /* The pcre options (numeric) */ - int flags; /* The pcrs and user flags (see "Flags" above) */ + unsigned int flags; /* The pcrs and user flags (see "Flags" above) */ pcrs_substitute *substitute; /* The compiled pcrs substitute */ struct PCRS_JOB *next; /* Pointer for chaining jobs to joblists */ } pcrs_job; -- 2.40.1 From 45f62a8b327276e9f5718f275181c7e293bd6f7a Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 13:53:20 +0200 Subject: [PATCH 05/34] template_fill(): Update the buffer size after running pcrs_execute() So each pcrs_execute() call acts on the whole output of the previous run. --- cgi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cgi.c b/cgi.c index 92db628d..7f812d26 100644 --- a/cgi.c +++ b/cgi.c @@ -2023,7 +2023,7 @@ jb_err template_fill(char **template_ptr, const struct map *exports) char buf[BUFFER_SIZE]; char *tmp_out_buffer; char *file_buffer; - size_t bufsize, size; + size_t buffer_size, new_size; int error; const char *flags; @@ -2032,7 +2032,7 @@ jb_err template_fill(char **template_ptr, const struct map *exports) assert(exports); file_buffer = *template_ptr; - bufsize = strlen(file_buffer) + 1; + buffer_size = strlen(file_buffer) + 1; /* * Assemble pcrs joblist from exports map @@ -2083,7 +2083,10 @@ jb_err template_fill(char **template_ptr, const struct map *exports) } else { - error = pcrs_execute(job, file_buffer, bufsize, &tmp_out_buffer, &size); + error = pcrs_execute(job, file_buffer, buffer_size, &tmp_out_buffer, + &new_size); + + buffer_size = new_size; pcrs_free_job(job); if (NULL == tmp_out_buffer) -- 2.40.1 From 0ad1d3c11b2c061d3ef03330a2dd25253e8079e6 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 14:26:17 +0200 Subject: [PATCH 06/34] pcrs_compile(): Add ifdefs around errorstr declaration. Squash --- pcrs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pcrs.c b/pcrs.c index 19fcc235..02cfed69 100644 --- a/pcrs.c +++ b/pcrs.c @@ -633,7 +633,9 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * int pcre_study_options = 0; *errptr = 0; const char *error; +#ifdef HAVE_PCRE2 unsigned char errorstr[128]; +#endif /* * Handle NULL arguments -- 2.40.1 From 2ae4e67ea278f12fa1f58b6cc256cb62bd982f09 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 14:26:34 +0200 Subject: [PATCH 07/34] pcrs_compile(): Fix compiler warning --- pcrs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pcrs.c b/pcrs.c index 02cfed69..a8023e16 100644 --- a/pcrs.c +++ b/pcrs.c @@ -667,8 +667,9 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * */ #ifdef HAVE_PCRE2 int errcode; - newjob->pattern = pcre2_compile((const unsigned char *) pattern, PCRE2_ZERO_TERMINATED, newjob->options, - &errcode, (PCRE2_SIZE*) errptr, NULL); + newjob->pattern = pcre2_compile((const unsigned char *)pattern, + PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, &errcode, + (PCRE2_SIZE*)errptr, NULL); pcre2_get_error_message(errcode, errorstr, 128); #else newjob->pattern = pcre_compile(pattern, newjob->options, &error, errptr, NULL); -- 2.40.1 From d77e3a4922df168af0641c1a786a83fae9051998 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 14:27:20 +0200 Subject: [PATCH 08/34] pcrs_compile(): Fix compiler warning --- pcrs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index a8023e16..95e0949d 100644 --- a/pcrs.c +++ b/pcrs.c @@ -632,9 +632,10 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * int capturecount; int pcre_study_options = 0; *errptr = 0; - const char *error; #ifdef HAVE_PCRE2 unsigned char errorstr[128]; +#else + const char *error; #endif /* -- 2.40.1 From 2fae12b3f0036a6360533dc21e6578c5d3ab370f Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 14:28:14 +0200 Subject: [PATCH 09/34] pcrs_compile(): Use sizeof instead of magic number --- pcrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index 95e0949d..ec73b46e 100644 --- a/pcrs.c +++ b/pcrs.c @@ -671,7 +671,7 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * newjob->pattern = pcre2_compile((const unsigned char *)pattern, PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, &errcode, (PCRE2_SIZE*)errptr, NULL); - pcre2_get_error_message(errcode, errorstr, 128); + pcre2_get_error_message(errcode, errorstr, sizeof(errorstr)); #else newjob->pattern = pcre_compile(pattern, newjob->options, &error, errptr, NULL); #endif -- 2.40.1 From 5fa18d9704e5c2d221a76d3407cb0cf0d59de83f Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 14:29:50 +0200 Subject: [PATCH 10/34] pcrs_compile(): Fix compiler warning --- pcrs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index ec73b46e..6c430f35 100644 --- a/pcrs.c +++ b/pcrs.c @@ -702,7 +702,8 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * * hints will be NULL for a boring pattern anyway. */ #ifdef HAVE_PCRE2 - if(pcre2_jit_compile(newjob->pattern, pcre_study_options) && strlen(errorstr)) + if (pcre2_jit_compile(newjob->pattern, pcre_study_options) && + strlen((const char *)errorstr)) #else newjob->hints = pcre_study(newjob->pattern, pcre_study_options, &error); if (error != NULL) -- 2.40.1 From dde251ff96f7cadbab0e3f1fca5ee5718d4099f3 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 14:32:25 +0200 Subject: [PATCH 11/34] pcrs_compile(): Fix compiler warning --- pcrs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pcrs.c b/pcrs.c index 6c430f35..ba0ac434 100644 --- a/pcrs.c +++ b/pcrs.c @@ -630,7 +630,11 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * pcrs_job *newjob; unsigned int flags; int capturecount; +#ifdef HAVE_PCRE2 + unsigned pcre_study_options = 0; +#else int pcre_study_options = 0; +#endif *errptr = 0; #ifdef HAVE_PCRE2 unsigned char errorstr[128]; -- 2.40.1 From fb74eede8a42e83860df8da932c14d1ee9849d0b Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 15:26:35 +0200 Subject: [PATCH 12/34] pcrs_execute(): fix indentation --- pcrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index ba0ac434..b3c93ce6 100644 --- a/pcrs.c +++ b/pcrs.c @@ -839,7 +839,7 @@ int pcrs_execute_list(pcrs_job *joblist, char *subject, size_t subject_length, c int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char **result, size_t *result_length) { - int offset, + int offset, i, k, matches_found, submatches, -- 2.40.1 From 5bf511bd633b3ade3833f7483bef0cff125f00d1 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 15:26:46 +0200 Subject: [PATCH 13/34] pcrs_strerror(): fix indentation --- pcrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index b3c93ce6..b7b99c1f 100644 --- a/pcrs.c +++ b/pcrs.c @@ -82,7 +82,7 @@ const char *pcrs_strerror(const int error) { switch (error) { - /* Passed-through PCRE error: */ + /* Passed-through PCRE error: */ case PCREn(ERROR_NOMEMORY): return "(pcre:) No memory"; /* Shouldn't happen unless PCRE or PCRS bug, or user messed with compiled job: */ -- 2.40.1 From 0ec1cae123eb3604a10ac3b134a1913162185961 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 17:15:15 +0200 Subject: [PATCH 14/34] configure.in: Bring back space. Squash --- configure.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.in b/configure.in index d7551eda..699d6a42 100644 --- a/configure.in +++ b/configure.in @@ -899,7 +899,7 @@ if test $have_pcre2 = "no"; then AC_CHECK_LIB(pcre, pcre_compile, [ AC_CHECK_HEADER(pcre.h, [ - AC_EGREP_HEADER(pcre_fullinfo, pcre.h, [have_pcre=yes],[AC_MSG_WARN([[pcre old version installed]]); have_pcre=no]) + AC_EGREP_HEADER(pcre_fullinfo, pcre.h, [have_pcre=yes], [AC_MSG_WARN([[pcre old version installed]]); have_pcre=no]) ], [ AC_CHECK_HEADER(pcre/pcre.h, [ AC_EGREP_HEADER(pcre_fullinfo, pcre/pcre.h, [have_pcre=yes]; [AC_DEFINE(PCRE_H_IN_SUBDIR)], [AC_MSG_WARN([[pcre old version installed]]); have_pcre=no]) -- 2.40.1 From f36ce5af875f00c73b03cca9f93f1e9719c842ec Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 17:17:32 +0200 Subject: [PATCH 15/34] configure.in: Improve error message. Squash --- configure.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.in b/configure.in index 699d6a42..e6e5acf3 100644 --- a/configure.in +++ b/configure.in @@ -1170,7 +1170,7 @@ if test $have_pcre = "yes"; then AC_DEFINE(FEATURE_DYNAMIC_PCRE,1,[Define to dynamically link to pcre.]) fi else - AC_MSG_ERROR(pcre2 or pcre library not detected.) + AC_MSG_ERROR(Detected neither pcre2 nor pcre library.) fi fi -- 2.40.1 From a70b1ba461a8075211b96f65b9e052cca5485a1f Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 17:25:48 +0200 Subject: [PATCH 16/34] pcrs_execute(): Fix compiler warnings. Squash --- pcrs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pcrs.c b/pcrs.c index b7b99c1f..a756a153 100644 --- a/pcrs.c +++ b/pcrs.c @@ -885,7 +885,8 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char newsize = subject_length; #ifdef HAVE_PCRE2 - while ((submatches = pcre2_match(job->pattern, (const unsigned char *) subject, (int)subject_length, offset, 0, pcre2_matches, NULL)) > 0) + while ((submatches = pcre2_match(job->pattern, (const unsigned char *)subject, + subject_length, (size_t)offset, 0, pcre2_matches, NULL)) > 0) #else while ((submatches = pcre_exec(job->pattern, job->hints, subject, (int)subject_length, offset, 0, offsets, 3 * PCRS_MAX_SUBMATCHES)) > 0) #endif @@ -895,7 +896,7 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char for (k = 0; k < submatches; k++) { - matches[i].submatch_offset[k] = offsets[2 * k]; + matches[i].submatch_offset[k] = (int)offsets[2 * k]; /* Note: Non-found optional submatches have length -1-(-1)==0 */ matches[i].submatch_length[k] = (size_t)(offsets[2 * k + 1] - offsets[2 * k]); @@ -912,7 +913,7 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char newsize += (size_t)offsets[0] * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES]; /* chunk after match */ - matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = offsets[1]; + matches[i].submatch_offset[PCRS_MAX_SUBMATCHES + 1] = (int)offsets[1]; matches[i].submatch_length[PCRS_MAX_SUBMATCHES + 1] = subject_length - (size_t)offsets[1] - 1; newsize += (subject_length - (size_t)offsets[1]) * (size_t)job->substitute->backref_count[PCRS_MAX_SUBMATCHES + 1]; @@ -940,7 +941,7 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char break; /* Go find the next one */ else - offset = offsets[1]; + offset = (int)offsets[1]; } /* Pass pcre error through if (bad) failure */ #ifdef HAVE_PCRE2 -- 2.40.1 From de529fd79ce9cc34d15d842a14bf0bb6435213b6 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sat, 17 Jun 2023 17:49:18 +0200 Subject: [PATCH 17/34] templates/show-status: Update FEATURE_DYNAMIC_PCRE description (WIP) --- templates/show-status | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/templates/show-status b/templates/show-status index 896c86d2..a6aaf8bf 100644 --- a/templates/show-status +++ b/templates/show-status @@ -298,10 +298,7 @@ FEATURE_DYNAMIC_PCRE @if-FEATURE_DYNAMIC_PCRE-then@ Yes @else-not-FEATURE_DYNAMIC_PCRE@ No @endif-FEATURE_DYNAMIC_PCRE@ - Dynamically link to the PCRE library. This is set automatically - by ./configure if you do not have libpcre installed. - Dynamically linking to an external libpcre is recommended as the one that is distributed - with Privoxy itself is outdated and lacks various features and bug-fixes you may be interested in. + Dynamically link to the PCRE(2) library (recommended). FEATURE_EXTENDED_STATISTICS -- 2.40.1 From 77cbd8f4fac520658e52fffbfae2493a96bde1a6 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sun, 18 Jun 2023 15:33:38 +0200 Subject: [PATCH 18/34] pcrs_execute_single_command: Use new variable --- pcrs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pcrs.c b/pcrs.c index a756a153..5fb9fcc3 100644 --- a/pcrs.c +++ b/pcrs.c @@ -1161,7 +1161,7 @@ char pcrs_get_delimiter(const char *string) *********************************************************************/ char *pcrs_execute_single_command(const char *subject, const char *pcrs_command, int *hits) { - size_t size; + size_t buffer_size, new_size; char *result = NULL; pcrs_job *job; @@ -1169,12 +1169,15 @@ char *pcrs_execute_single_command(const char *subject, const char *pcrs_command, assert(pcrs_command); *hits = 0; - size = strlen(subject); + buffer_size = strlen(subject); job = pcrs_compile_command(pcrs_command, hits); if (NULL != job) { - *hits = pcrs_execute(job, subject, size, &result, &size); + + *hits = pcrs_execute(job, subject, buffer_size, &result, &new_size); + buffer_size = new_size; + if (*hits < 0) { freez(result); -- 2.40.1 From 8d80dc6e706c5e7d1451c154a5a26a7d15b79279 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sun, 18 Jun 2023 15:34:50 +0200 Subject: [PATCH 19/34] pcrs_compile(): Remove strlen() call to fix error handling. Squash --- pcrs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pcrs.c b/pcrs.c index 5fb9fcc3..09c0568c 100644 --- a/pcrs.c +++ b/pcrs.c @@ -706,8 +706,7 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * * hints will be NULL for a boring pattern anyway. */ #ifdef HAVE_PCRE2 - if (pcre2_jit_compile(newjob->pattern, pcre_study_options) && - strlen((const char *)errorstr)) + if (pcre2_jit_compile(newjob->pattern, pcre_study_options)) #else newjob->hints = pcre_study(newjob->pattern, pcre_study_options, &error); if (error != NULL) -- 2.40.1 From d7ab96df1120ed2559646c2c5ff16e81194f7e9b Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Mon, 19 Jun 2023 09:40:04 +0200 Subject: [PATCH 20/34] pcrs_compile(): Only call pcre2_jit_compile() when pcre_study_options is set --- pcrs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index 09c0568c..1705306e 100644 --- a/pcrs.c +++ b/pcrs.c @@ -706,7 +706,8 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * * hints will be NULL for a boring pattern anyway. */ #ifdef HAVE_PCRE2 - if (pcre2_jit_compile(newjob->pattern, pcre_study_options)) + if ((pcre_study_options == PCRE2_JIT_COMPLETE) && + pcre2_jit_compile(newjob->pattern, pcre_study_options)) #else newjob->hints = pcre_study(newjob->pattern, pcre_study_options, &error); if (error != NULL) -- 2.40.1 From e67328ad30ab193b5adc4c518ce6657c1e8572e4 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Mon, 19 Jun 2023 09:44:45 +0200 Subject: [PATCH 21/34] pcrs_free_job(): Free using pcre2_code_free() when using pcre2 Fixes a memory leak: ==24137== Thread 1: ==24137== 80 bytes in 1 blocks are definitely lost in loss record 578 of 810 ==24137== at 0x484CBC4: malloc (in /usr/local/libexec/valgrind/vgpreload_memcheck-amd64-freebsd.so) ==24137== by 0x4BD3C0D: ??? (in /usr/local/lib/libpcre2-8.so.0.11.2) ==24137== by 0x4BBC3D9: pcre2_jit_compile_8 (in /usr/local/lib/libpcre2-8.so.0.11.2) ==24137== by 0x275615: pcrs_compile (pcrs.c:710) ==24137== by 0x2753FE: pcrs_compile_command (pcrs.c:602) ==24137== by 0x276B9A: pcrs_execute_single_command (pcrs.c:1174) ==24137== by 0x24AA13: rewrite_url (filters.c:1007) ==24137== by 0x24ACF6: redirect_url (filters.c:1257) ==24137== by 0x2583B4: crunch_response_triggered (jcc.c:953) ==24137== by 0x2569D5: chat (jcc.c:4482) ==24137== by 0x255735: serve (jcc.c:5056) ==24137== by 0x4CD5A79: ??? (in /lib/libthr.so.3) --- pcrs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index 1705306e..4860517d 100644 --- a/pcrs.c +++ b/pcrs.c @@ -471,7 +471,14 @@ pcrs_job *pcrs_free_job(pcrs_job *job) else { next = job->next; - if (job->pattern != NULL) free(job->pattern); + if (job->pattern != NULL) + { +#ifdef HAVE_PCRE2 + pcre2_code_free(job->pattern); +#else + free(job->pattern); +#endif + } #ifndef HAVE_PCRE2 if (job->hints != NULL) { -- 2.40.1 From 4e182f7461f2a4076be0daf0bb576d8e1307ef9f Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Mon, 19 Jun 2023 11:12:36 +0200 Subject: [PATCH 22/34] pcrs_compile(): Pass a proper PCRE_SIZE variable to pcre2_compile() Prevents stack corruption like: (gdb) where #0 kill () at kill.S:4 #1 0x00000008008926e0 in __fail (msg=0x80079c824 "stack overflow detected; terminated") at /usr/src/lib/libc/secure/stack_protector.c:130 #2 0x0000000800892650 in __stack_chk_fail () at /usr/src/lib/libc/secure/stack_protector.c:137 #3 0x000000000024abed in rewrite_url (old_url=0x801e7b280 "https://slashdot.org/story/23/06/18/0332230/what-happens-when-you-ask-alexa-if-amazon-is-a-monopoly?utm_source=rss1.0mainlinkanon&utm_medium=feed", pcrs_command=0x801a00180 "s@\\?(utm_source=rss1.0)?(mainlinkanon)?&utm_medium=feed@@") at filters.c:1038 #4 0x000000000024acf7 in redirect_url (csp=0x800ef1008) at filters.c:1257 #5 0x00000000002583b5 in crunch_response_triggered (csp=0x800ef1008, crunchers=0x218920 ) at jcc.c:953 #6 0x00000000002569d6 in chat (csp=0x800ef1008) at jcc.c:4482 #7 0x0000000000255736 in serve (csp=0x800ef1008) at jcc.c:5056 #8 0x000000080073ca7a in thread_start (curthread=0x800e12700) at /usr/src/lib/libthr/thread/thr_create.c:292 #9 0x0000000000000000 in ?? () Backtrace stopped: Cannot access memory at address 0x7fffdfffe000 --- pcrs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index 4860517d..05637217 100644 --- a/pcrs.c +++ b/pcrs.c @@ -679,9 +679,10 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * */ #ifdef HAVE_PCRE2 int errcode; + PCRE2_SIZE error_offset; newjob->pattern = pcre2_compile((const unsigned char *)pattern, PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, &errcode, - (PCRE2_SIZE*)errptr, NULL); + &error_offset, NULL); pcre2_get_error_message(errcode, errorstr, sizeof(errorstr)); #else newjob->pattern = pcre_compile(pattern, newjob->options, &error, errptr, NULL); -- 2.40.1 From f94ab3cda554f4abf24a250d5e1830548f79c3ff Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Mon, 19 Jun 2023 13:57:26 +0200 Subject: [PATCH 23/34] pcrs_strerror(): Ditch trailing white-space. Squash --- pcrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index 05637217..317539d2 100644 --- a/pcrs.c +++ b/pcrs.c @@ -89,7 +89,7 @@ const char *pcrs_strerror(const int error) case PCREn(ERROR_NULL): return "(pcre:) NULL code or subject or ovector"; case PCREn(ERROR_BADOPTION): return "(pcre:) Unrecognized option bit"; case PCREn(ERROR_BADMAGIC): return "(pcre:) Bad magic number in code"; -#if defined(PCRE_ERROR_UNKNOWN_NODE) +#if defined(PCRE_ERROR_UNKNOWN_NODE) case PCRE_ERROR_UNKNOWN_NODE: return "(pcre:) Bad node in pattern"; #endif /* Can't happen / not passed: */ -- 2.40.1 From a812b49f09527aa7536795ebdae3afc4cf22ab77 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Mon, 19 Jun 2023 14:53:45 +0200 Subject: [PATCH 24/34] pcrs_execute(): Use pcre2_match_data_create_from_pattern() instead of pcre2_match_data_create() --- pcrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index 317539d2..429dd02a 100644 --- a/pcrs.c +++ b/pcrs.c @@ -875,7 +875,7 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char } #ifdef HAVE_PCRE2 - if (NULL == (pcre2_matches = pcre2_match_data_create((size_t)PCRS_MAX_SUBMATCHES, NULL))) + if (NULL == (pcre2_matches = pcre2_match_data_create_from_pattern(job->pattern, NULL))) { return(PCRS_ERR_NOMEM); } -- 2.40.1 From 882329b938b024e1a7a3f400d0ad351753b86652 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Tue, 20 Jun 2023 11:46:26 +0200 Subject: [PATCH 25/34] pcrs_compile(): Set errptr so the caller can figure out the type of compile errors --- pcrs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pcrs.c b/pcrs.c index 429dd02a..3b6cfb1e 100644 --- a/pcrs.c +++ b/pcrs.c @@ -678,10 +678,9 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * * Compile the pattern */ #ifdef HAVE_PCRE2 - int errcode; PCRE2_SIZE error_offset; newjob->pattern = pcre2_compile((const unsigned char *)pattern, - PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, &errcode, + PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, errptr, &error_offset, NULL); pcre2_get_error_message(errcode, errorstr, sizeof(errorstr)); #else -- 2.40.1 From c41e67df93cdb63604e585d05057e2b57f09dadc Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Tue, 20 Jun 2023 11:52:27 +0200 Subject: [PATCH 26/34] Move pcre2_get_error_message() call to pcrs_strerror() so pcre2 errors are properly explained --- pcrs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pcrs.c b/pcrs.c index 3b6cfb1e..81788a15 100644 --- a/pcrs.c +++ b/pcrs.c @@ -118,9 +118,13 @@ const char *pcrs_strerror(const int error) * PCRE version all bets are off ... */ default: +#ifdef HAVE_PCRE2 + pcre2_get_error_message(error, (PCRE2_UCHAR8*)buf, sizeof(buf)); +#else snprintf(buf, sizeof(buf), "Error code %d. For details, check the pcre documentation.", error); +#endif return buf; } } @@ -643,9 +647,7 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * int pcre_study_options = 0; #endif *errptr = 0; -#ifdef HAVE_PCRE2 - unsigned char errorstr[128]; -#else +#ifndef HAVE_PCRE2 const char *error; #endif @@ -682,7 +684,6 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * newjob->pattern = pcre2_compile((const unsigned char *)pattern, PCRE2_ZERO_TERMINATED, (unsigned)newjob->options, errptr, &error_offset, NULL); - pcre2_get_error_message(errcode, errorstr, sizeof(errorstr)); #else newjob->pattern = pcre_compile(pattern, newjob->options, &error, errptr, NULL); #endif -- 2.40.1 From e9bc24ea5730cf9213e1831b32939acebd3e2181 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Tue, 20 Jun 2023 11:55:29 +0200 Subject: [PATCH 27/34] pcrs_strerror(): Remove obsolete comment --- pcrs.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pcrs.c b/pcrs.c index 81788a15..063585f2 100644 --- a/pcrs.c +++ b/pcrs.c @@ -111,12 +111,6 @@ const char *pcrs_strerror(const int error) case PCRS_WARN_TRUNCATION: return "(pcrs:) At least one variable was too big and has been truncated before compilation"; - /* - * XXX: With the exception of PCRE_ERROR_MATCHLIMIT we - * only catch PCRE errors that can happen with our internal - * version. If Privoxy is linked against a newer - * PCRE version all bets are off ... - */ default: #ifdef HAVE_PCRE2 pcre2_get_error_message(error, (PCRE2_UCHAR8*)buf, sizeof(buf)); -- 2.40.1 From 6ba852754af2556437edf316e4666b24f18ee22c Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Tue, 20 Jun 2023 12:07:00 +0200 Subject: [PATCH 28/34] pcrs_compile(): Remove pointless comment --- pcrs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index 063585f2..5f75446d 100644 --- a/pcrs.c +++ b/pcrs.c @@ -696,7 +696,6 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * #ifdef HAVE_PCRE2 pcre_study_options = PCRE2_JIT_COMPLETE; #else - // pcre_study no longer exists in pcre2 pcre_study_options = PCRE_STUDY_JIT_COMPILE; #endif } -- 2.40.1 From 237d85c57399282807c87db31e49e25e4b78e955 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Tue, 20 Jun 2023 12:51:50 +0200 Subject: [PATCH 29/34] pcrs_execute(): When using pcre2 malloc more data then we need to workaround invalid reads ... in the jit code like like: ==70430== Invalid read of size 16 ==70430== at 0x482D397: ??? ==70430== by 0x79FEBBF: ??? ==70430== Address 0x7a0323f is 18,047 bytes inside a block of size 18,055 alloc'd ==70430== at 0x484CBC4: malloc (in /usr/local/libexec/valgrind/vgpreload_memcheck-amd64-freebsd.so) ==70430== by 0x276594: pcrs_execute (pcrs.c:967) ==70430== by 0x24E6E4: pcrs_filter_impl (filters.c:1656) ==70430== by 0x24E2A1: pcrs_filter_response_body (filters.c:1768) ==70430== by 0x24BAD7: execute_content_filters (filters.c:2561) ==70430== by 0x2597C6: handle_established_connection (jcc.c:3597) ==70430== by 0x25771C: chat (jcc.c:4870) ==70430== by 0x255755: serve (jcc.c:5056) ==70430== by 0x4CD5A79: ??? (in /lib/libthr.so.3) ==70430== by 0x1FDB1F9FFF: ??? --- pcrs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pcrs.c b/pcrs.c index 5f75446d..8a7ea3e7 100644 --- a/pcrs.c +++ b/pcrs.c @@ -964,7 +964,14 @@ int pcrs_execute(pcrs_job *job, const char *subject, size_t subject_length, char * Get memory for the result (must be freed by caller!) * and append terminating null byte. */ - if ((*result = (char *)malloc(newsize + 1)) == NULL) + if ((*result = (char *)malloc(newsize + 1 +#ifdef HAVE_PCRE2 + /* + * Work around to prevent invalid reads in the jit code. + */ + + 16 +#endif + )) == NULL) { free(matches); #ifdef HAVE_PCRE2 -- 2.40.1 From d626c141f41670c9596630f48990cbff0f04631e Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Thu, 22 Jun 2023 11:47:11 +0200 Subject: [PATCH 30/34] pcrs_compile: Continue if JIT compilation isn't supported by pcre2 While at it, remove the pcre_study_options variable for the pcre2 code. --- pcrs.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pcrs.c b/pcrs.c index 8a7ea3e7..8be52f19 100644 --- a/pcrs.c +++ b/pcrs.c @@ -636,7 +636,7 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * unsigned int flags; int capturecount; #ifdef HAVE_PCRE2 - unsigned pcre_study_options = 0; + int ret; #else int pcre_study_options = 0; #endif @@ -694,7 +694,14 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * if (!(flags & PCRS_DYNAMIC)) { #ifdef HAVE_PCRE2 - pcre_study_options = PCRE2_JIT_COMPLETE; + /* Try to enable JIT compilation but continue if it's unsupported. */ + if ((ret = pcre2_jit_compile(newjob->pattern, PCRE2_JIT_COMPLETE)) && + (ret != PCRE2_ERROR_JIT_BADOPTION)) + { + *errptr = ret; + pcrs_free_job(newjob); + return NULL; + } #else pcre_study_options = PCRE_STUDY_JIT_COMPILE; #endif @@ -702,22 +709,19 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * #endif #endif +#ifndef HAVE_PCRE2 /* * Generate hints. This has little overhead, since the * hints will be NULL for a boring pattern anyway. */ -#ifdef HAVE_PCRE2 - if ((pcre_study_options == PCRE2_JIT_COMPLETE) && - pcre2_jit_compile(newjob->pattern, pcre_study_options)) -#else newjob->hints = pcre_study(newjob->pattern, pcre_study_options, &error); if (error != NULL) -#endif { *errptr = PCRS_ERR_STUDY; pcrs_free_job(newjob); return NULL; } +#endif /* * Determine the number of capturing subpatterns. -- 2.40.1 From 5ef537b09acc23eb61a9965ac7682f3b5e8de3ed Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Thu, 22 Jun 2023 12:02:44 +0200 Subject: [PATCH 31/34] pcrs_compile(): Move variable initialisation out of declaration block --- pcrs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pcrs.c b/pcrs.c index 8be52f19..975fbdaa 100644 --- a/pcrs.c +++ b/pcrs.c @@ -639,12 +639,11 @@ pcrs_job *pcrs_compile(const char *pattern, const char *substitute, const char * int ret; #else int pcre_study_options = 0; -#endif - *errptr = 0; -#ifndef HAVE_PCRE2 const char *error; #endif + *errptr = 0; + /* * Handle NULL arguments */ -- 2.40.1 From a8bc9d6282e725d7536ab9aab21c2d90315985fb Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Sun, 25 Jun 2023 19:09:16 +0200 Subject: [PATCH 32/34] When using pcre2, consistently use the pcre2_match() interface ... instead of the POSIX api (regexec() etc.) to match strings. Seems to work around crashes that previously could be triggered with: privoxy-regression-test.pl --forks 1. One known downside is that: curl -v 'http://p.p/show-url-info?url=http%3A%2F%2Fl.yimg.com%2Fg%2Fcombo%2F1?event-custom%2Fevent-custom-min.js%26event%2Fevent-min.js%26j%2F.H-.K.A.vNKEa%26j%2F.CP-.U-.DE.A.vKEJz%26j%2F.J_.BR_.CA.A.vKYkg%26j%2F.J_.DB.A.vPpBT%26j%2Fpopup-login.A.vR53Z%26dump%2Fdump-min.js%26datatype%2Fdatatype-xml-min.js%26substitute%2Fsubstitute-min.js%26json%2Fjson-min.js%26queue-promote%2Fqueue-promote-min.js%26io%2Fio-min.js%26j%2F.J_.DS.A.vQa28%26j%2F.FW-.FX-.GH.A.vP3XB%26j%2Fgrease.A.vRktP%26j%2F.CC.A.vNiA6%26j%2F.C-.BL.A.vPPj2%26j%2F.CE-.K.A.vNy32%26attribute%2Fattribute-base-min.js%26base%2Fbase-min.js%26anim%2Fanim-min.js%26cookie%2Fcookie-min.js%26j%2F.B-.C-.F.A.vQ7SZ%26j%2Furls.A.vQtXp%26j%2F.B-.BY.A.vQCXP%26j%2F.H-.BY.A.vQXXx%26j%2F.DS-value-conversions.A.vQpRt%26j%2F.G-.BD.A.vNHSH%26event%2Fevent-synthetic-min.js%26j%2F.G-.BO.A.vNwR4%26j%2F.CV-.CH.A.vPFSZ%26j%2F.X-.W-.C-.F.A.vKPQa%26j%2F.X-.W-.D.A.vQXXx%26j%2F.Q-.BX-.K.A.vR1kt%26j%2F.DL.A.vLPjD%26j%2F.CF.A.vNC24%26j%2F.CX-.CY.A.vP8ND%26event-simulate%2Fevent-simulate-min.js%26node%2Fnode-event-simulate-min.js%26j%2F.B-.T-.CI-.C-.F.A.vPJPF%26j%2F.CM%2F.BA_2.5.1-.D.A.vPzui%26j%2Fbo-.S-.C-.F.A.vNwWe%26j%2Fbo-.S-.D.A.vR6Hx%26j%2F.BZ-.D.A.vNstB%26j%2F.B-.L-.C-.F.A.vNxPX%26j%2F.B-.L-.BH.A.vMdVB%26j%2F.CN-.DD.A.vLjJ2%26j%2F.B-.O-.C-.F.A.vPpcK%26j%2F.BM.A.vKPmz%26j%2F.B-.O.A.vQyHg%26j%2F.B-.H-.BB-.C-.F.A.vQvrB%26j%2F.CW-.CU.A.vQ7Rg%26j%2F.Y-.C-.F.A.vNqGa%26j%2F.Y.A.vLKiT%26j%2F.B-.M-.C-.F.A.vQxDc%26j%2F.U-.CG.A.vQ5Tt%26j%2F.B-.M.A.vQXXx%26j%2F.B-.Q-.BQ.A.vQvTt%26j%2F.B-.N-.C-.F.A.vQaRp%26j%2F.CL.A.vN4N6%26j%2F.B-.CL-.BW.A.vPwkx%26j%2F.DR-.DG.A.vMLJr%26j%2F.B-.BE-.C-.F.A.vPHP4%26j%2F.B-.BE-.D.A.vQLQH%26j%2F.BV.A.vm3Uz%26j%2F.Z-.DK-.D.A.vLQEe%26j%2F.Z-.DJ-.BJ.A.vLQEe%26j%2F.B-.I-.C-.F.A.vPKTK%26stylesheet%2Fstylesheet-min.js%26j%2F.B-.I.A.vQvDF%26j%2F.CM-.DO.A.vPboD%26j%2F.B-.D.A.vRbv8%26j%2F.B-.H-.BB.A.vQuhn%26j%2F.B-.N.A.vR6Cn%26j%2F.B-.L-.CZ.A.vQmzP%26j%2F.B-.T-.CI.A.vQXXx%26j%2F.B-.I-.CQ-.BK-.C-.F.A.vNwZF%26j%2F.B-.I-.CQ-.BK.A.vLWQR%26j%2F.B-.R-.C-.F.A.vPfwi%26j%2F.B-.R.A.vRhND%26j%2F.DN-.BB-.D-.C-.F.A.vQXZg%26j%2F.DN-.BB-.D.A.vRcXB%26j%2F.BF_.D-.C-.F.A.vPGYM%26j%2F.BF_.D.A.vQxJn%26plugin%2Fplugin-min.js%26cache%2Fcache-min.js%26j%2F.CB-.C-.F.A.vNwWe%26j%2F.CB-.D.A.vQS6T' ... now takes ~41 seconds on my system when using valgrind. --- actions.c | 8 +++ client-tags.c | 5 ++ project.h | 12 +++++ urlmatch.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++ urlmatch.h | 4 ++ 5 files changed, 173 insertions(+) diff --git a/actions.c b/actions.c index ddcb6a8c..4523baa2 100644 --- a/actions.c +++ b/actions.c @@ -828,8 +828,12 @@ int update_action_bits_for_tag(struct client_state *csp, const char *tag) continue; } +#ifdef HAVE_PCRE2 + if (pcre2_pattern_matches(b->url->pattern.tag_regex, tag)) +#else /* and check if one of the tag patterns matches the tag, */ if (0 == regexec(b->url->pattern.tag_regex, tag, 0, NULL, 0)) +#endif { /* if it does, update the action bit map, */ if (merge_current_action(csp->action, b->action)) @@ -884,7 +888,11 @@ jb_err check_negative_tag_patterns(struct client_state *csp, unsigned int flag) } for (tag = csp->tags->first; NULL != tag; tag = tag->next) { +#ifdef HAVE_PCRE2 + if (pcre2_pattern_matches(b->url->pattern.tag_regex, tag->str)) +#else if (0 == regexec(b->url->pattern.tag_regex, tag->str, 0, NULL, 0)) +#endif { /* * The pattern matches at least one tag, thus the action diff --git a/client-tags.c b/client-tags.c index 51e8a9c5..474e5695 100644 --- a/client-tags.c +++ b/client-tags.c @@ -43,6 +43,7 @@ #include "miscutil.h" #include "errlog.h" #include "parsers.h" +#include "urlmatch.h" struct client_specific_tag { @@ -658,7 +659,11 @@ int client_tag_match(const struct pattern_spec *pattern, for (tag = tags->first; tag != NULL; tag = tag->next) { +#ifdef HAVE_PCRE2 + if (pcre2_pattern_matches(pattern->pattern.tag_regex, tag->str)) +#else if (0 == regexec(pattern->pattern.tag_regex, tag->str, 0, NULL, 0)) +#endif { log_error(LOG_LEVEL_TAGGING, "Client tag '%s' matches.", tag->str); return 1; diff --git a/project.h b/project.h index df2e02bd..9de13dea 100644 --- a/project.h +++ b/project.h @@ -439,7 +439,11 @@ struct http_response struct url_spec { #ifdef FEATURE_PCRE_HOST_PATTERNS +#ifdef HAVE_PCRE2 + pcre2_code *host_regex;/**< Regex for host matching */ +#else regex_t *host_regex;/**< Regex for host matching */ +#endif enum host_regex_type { VANILLA_HOST_PATTERN, PCRE_HOST_PATTERN } host_regex_type; #endif /* defined FEATURE_PCRE_HOST_PATTERNS */ int dcount; /**< How many parts to this domain? (length of dvec) */ @@ -449,7 +453,11 @@ struct url_spec char *port_list; /**< List of acceptable ports, or NULL to match all ports */ +#ifdef HAVE_PCRE2 + pcre2_code *preg; /**< Regex for matching path part */ +#else regex_t *preg; /**< Regex for matching path part */ +#endif }; /** @@ -464,7 +472,11 @@ struct pattern_spec union { struct url_spec url_spec; +#ifdef HAVE_PCRE2 + pcre2_code *tag_regex; +#else regex_t *tag_regex; +#endif } pattern; unsigned int flags; /**< Bitmap with various pattern properties. */ diff --git a/urlmatch.c b/urlmatch.c index 5bd59de2..07a2ce48 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -604,6 +604,85 @@ jb_err parse_http_request(const char *req, struct http_request *http) } +#ifdef HAVE_PCRE2 +/********************************************************************* + * + * Function : compile_pattern + * + * Description : Compiles a host, domain or TAG pattern. + * + * Parameters : + * 1 : pattern = The pattern to compile. + * 2 : anchoring = How the regex should be modified + * before compilation. Can be either + * one of NO_ANCHORING, LEFT_ANCHORED, + * RIGHT_ANCHORED or RIGHT_ANCHORED_HOST. + * 3 : url = In case of failures, the spec member is + * logged and the structure freed. + * 4 : regex = Where the compiled regex should be stored. + * + * Returns : JB_ERR_OK - Success + * JB_ERR_PARSE - Cannot parse regex + * + *********************************************************************/ +static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchoring, + struct pattern_spec *url, pcre2_code **regex) +{ + int errcode; + const char *fmt = NULL; + char *rebuf; + size_t rebuf_size; + PCRE2_SIZE error_offset; + + assert(pattern); + + if (pattern[0] == '\0') + { + *regex = NULL; + return JB_ERR_OK; + } + + switch (anchoring) + { + case NO_ANCHORING: + fmt = "%s"; + break; + case RIGHT_ANCHORED: + fmt = "%s$"; + break; + case RIGHT_ANCHORED_HOST: + fmt = "%s\\.?$"; + break; + case LEFT_ANCHORED: + fmt = "^%s"; + break; + default: + log_error(LOG_LEVEL_FATAL, + "Invalid anchoring in compile_pattern %d", anchoring); + } + rebuf_size = strlen(pattern) + strlen(fmt); + rebuf = malloc_or_die(rebuf_size); + + snprintf(rebuf, rebuf_size, fmt, pattern); + + *regex = pcre2_compile((const unsigned char *)pattern, + PCRE2_ZERO_TERMINATED, PCRE2_CASELESS, &errcode, + &error_offset, NULL); + if (*regex == NULL) + { + log_error(LOG_LEVEL_ERROR, "error compiling %s from %s: %s", + pattern, url->spec, rebuf); + freez(rebuf); + + return JB_ERR_PARSE; + } + + freez(rebuf); + + return JB_ERR_OK; + +} +#else /********************************************************************* * * Function : compile_pattern @@ -686,6 +765,7 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin return JB_ERR_OK; } +#endif /********************************************************************* @@ -1051,6 +1131,49 @@ static int simplematch(const char *pattern, const char *text) } +#ifdef HAVE_PCRE2 +/********************************************************************* + * + * Function : pcre2_pattern_matches + * + * Description : Checks if a compiled pcre2 pattern matches a string. + * + * Parameters : + * 1 : pattern = The compiled pattern + * 2 : string = The string to check + * + * Returns : TRUE for yes, FALSE otherwise. + * + *********************************************************************/ +int pcre2_pattern_matches(const pcre2_code *pattern, const char *string) +{ + PCRE2_SIZE offset; + int ret; + pcre2_match_data *pcre2_matches; + + assert(pattern != NULL); + assert(string != NULL); + + offset = 0; + + pcre2_matches = pcre2_match_data_create_from_pattern(pattern, NULL); + if (NULL == pcre2_matches) + { + log_error(LOG_LEVEL_ERROR, + "Out of memory while matching pattern against %s", string); + return FALSE; + } + + ret = pcre2_match(pattern, (const unsigned char *)string, strlen(string), + offset, 0, pcre2_matches, NULL); + + pcre2_match_data_free(pcre2_matches); + + return (ret >= 0); +} +#endif + + /********************************************************************* * * Function : simple_domaincmp @@ -1268,8 +1391,12 @@ void free_pattern_spec(struct pattern_spec *pattern) { if (pattern->pattern.tag_regex) { +#ifdef HAVE_PCRE2 + pcre2_code_free(pattern->pattern.tag_regex); +#else regfree(pattern->pattern.tag_regex); freez(pattern->pattern.tag_regex); +#endif } return; } @@ -1277,8 +1404,12 @@ void free_pattern_spec(struct pattern_spec *pattern) #ifdef FEATURE_PCRE_HOST_PATTERNS if (pattern->pattern.url_spec.host_regex) { +#ifdef HAVE_PCRE2 + pcre2_code_free(pattern->pattern.url_spec.host_regex); +#else regfree(pattern->pattern.url_spec.host_regex); freez(pattern->pattern.url_spec.host_regex); +#endif } #endif /* def FEATURE_PCRE_HOST_PATTERNS */ freez(pattern->pattern.url_spec.dbuffer); @@ -1287,8 +1418,12 @@ void free_pattern_spec(struct pattern_spec *pattern) freez(pattern->pattern.url_spec.port_list); if (pattern->pattern.url_spec.preg) { +#ifdef HAVE_PCRE2 + pcre2_code_free(pattern->pattern.url_spec.preg); +#else regfree(pattern->pattern.url_spec.preg); freez(pattern->pattern.url_spec.preg); +#endif } } @@ -1333,8 +1468,13 @@ static int host_matches(const struct http_request *http, if (pattern->pattern.url_spec.host_regex_type == PCRE_HOST_PATTERN) { return ((NULL == pattern->pattern.url_spec.host_regex) +#ifdef HAVE_PCRE2 + || pcre2_pattern_matches(pattern->pattern.url_spec.host_regex, + http->host)); +#else || (0 == regexec(pattern->pattern.url_spec.host_regex, http->host, 0, NULL, 0))); +#endif } #endif return ((NULL == pattern->pattern.url_spec.dbuffer) || (0 == domain_match(pattern, http))); @@ -1357,7 +1497,11 @@ static int host_matches(const struct http_request *http, static int path_matches(const char *path, const struct pattern_spec *pattern) { return ((NULL == pattern->pattern.url_spec.preg) +#ifdef HAVE_PCRE2 + || (pcre2_pattern_matches(pattern->pattern.url_spec.preg, path))); +#else || (0 == regexec(pattern->pattern.url_spec.preg, path, 0, NULL, 0))); +#endif } diff --git a/urlmatch.h b/urlmatch.h index 315e8b24..8643aa4e 100644 --- a/urlmatch.h +++ b/urlmatch.h @@ -50,6 +50,10 @@ extern int url_requires_percent_encoding(const char *url); extern int url_match(const struct pattern_spec *pattern, const struct http_request *http); +#ifdef HAVE_PCRE2 +extern int pcre2_pattern_matches(const pcre2_code *pattern, const char *string); +#endif + extern jb_err create_pattern_spec(struct pattern_spec *url, char *buf); extern void free_pattern_spec(struct pattern_spec *url); extern int match_portlist(const char *portlist, int port); -- 2.40.1 From ba34f3878d3e0ecb08ca38879156090315305af8 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Tue, 27 Jun 2023 17:14:37 +0200 Subject: [PATCH 33/34] compile_pattern(): Enable JIT compilation when using pcre2 ... unless DISABLE_PCRE_JIT_COMPILATION is active. So far I haven't been able to measure a clear performance improvement but it doesn't seem to hurt either. --- urlmatch.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/urlmatch.c b/urlmatch.c index 07a2ce48..fdc4121a 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -633,6 +633,7 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin char *rebuf; size_t rebuf_size; PCRE2_SIZE error_offset; + int ret; assert(pattern); @@ -677,6 +678,20 @@ static jb_err compile_pattern(const char *pattern, enum regex_anchoring anchorin return JB_ERR_PARSE; } +#ifndef DISABLE_PCRE_JIT_COMPILATION + /* Try to enable JIT compilation but continue if it's unsupported. */ + if ((ret = pcre2_jit_compile(*regex, PCRE2_JIT_COMPLETE)) && + (ret != PCRE2_ERROR_JIT_BADOPTION)) + { + log_error(LOG_LEVEL_ERROR, + "Unexpected error enabling JIT compilation for %s from %s: %s", + pattern, url->spec, rebuf); + freez(rebuf); + + return JB_ERR_PARSE; + } +#endif + freez(rebuf); return JB_ERR_OK; -- 2.40.1 From 18db5286ae27829a52c0bc7fb42acf5a9f4dd5c1 Mon Sep 17 00:00:00 2001 From: Fabian Keil Date: Tue, 27 Jun 2023 17:33:06 +0200 Subject: [PATCH 34/34] configure.in: Add --disable-pcre2 option to try to use pcre1 even if pcre2 is available --- configure.in | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/configure.in b/configure.in index e6e5acf3..7418b48b 100644 --- a/configure.in +++ b/configure.in @@ -869,6 +869,18 @@ else ]) fi +AC_ARG_ENABLE(pcre2, +[ --disable-pcre2 Don't try to use pcre2 even if it's available], +[enableval2=$enableval], +[enableval2=yes]) +if test $enableval2 = yes; then + try_pcre2=yes +else + AC_MSG_WARN([Ignoring pcre2 even if it's available]) + try_pcre2=no +fi + +if test $try_pcre2 != no; then dnl ================================================================= dnl Checks for libraries. dnl ================================================================= @@ -894,6 +906,7 @@ AC_CHECK_LIB(pcre2-posix, regcomp, [ ], [have_pcre2posix=no]) ]) ], [have_pcre2posix=no], -lpcre2-8) +fi if test $have_pcre2 = "no"; then -- 2.40.1