From 1ed943ec556f4bfa49a2700b30c9bb58a91379eb Mon Sep 17 00:00:00 2001
From: Remi Collet <fedora@famillecollet.com>
Date: Tue, 16 Feb 2016 22:54:26 +0100
Subject: php 5.4.45-4 (security fix backported from 5.5.32)

---
 pcre838.patch | 6665 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 6665 insertions(+)
 create mode 100644 pcre838.patch

(limited to 'pcre838.patch')

diff --git a/pcre838.patch b/pcre838.patch
new file mode 100644
index 0000000..3b3dcdf
--- /dev/null
+++ b/pcre838.patch
@@ -0,0 +1,6665 @@
+Backported from 5.5 for 5.4 by Remi Collet
+
+
+diff -ru php-5.4.45/ext/pcre/pcrelib/config.h php55/php-5.5.31/ext/pcre/pcrelib/config.h
+--- php-5.4.45/ext/pcre/pcrelib/config.h	2015-09-01 22:09:37.000000000 +0200
++++ php-5.5.31/ext/pcre/pcrelib/config.h	2016-01-06 10:36:49.000000000 +0100
+@@ -302,6 +302,8 @@
+    */
+ /* #undef NO_RECURSE */
+ 
++#define PARENS_NEST_LIMIT 250
++
+ /* Name of package */
+ #define PACKAGE "pcre"
+ 
+diff -ru php54/php-5.4.45/ext/pcre/pcrelib/pcre_exec.c php55/php-5.5.31/ext/pcre/pcrelib/pcre_exec.c
+--- php-5.4.45/ext/pcre/pcrelib/pcre_exec.c	2015-09-01 22:09:37.000000000 +0200
++++ php-5.5.31/ext/pcre/pcrelib/pcre_exec.c	2016-01-06 10:36:49.000000000 +0100
+@@ -688,7 +688,7 @@
+ #define foc           number
+ #define save_mark     data
+ 
+-/* These statements are here to stop the compiler complaining about unitialized
++/* These statements are here to stop the compiler complaining about uninitialized
+ variables. */
+ 
+ #ifdef SUPPORT_UCP
+
+From ca02d9c2d6f9bea7bf8abe607f1ee9484b1d7b62 Mon Sep 17 00:00:00 2001
+From: Stanislav Malyshev <stas@php.net>
+Date: Sun, 31 Jan 2016 20:33:17 -0800
+Subject: [PATCH] Upgrade bundled PCRE to 8.38
+
+---
+ NEWS                                             |    3 +
+ ext/pcre/pcrelib/ChangeLog                       |  176 ++
+ ext/pcre/pcrelib/NEWS                            |    8 +
+ ext/pcre/pcrelib/config.h                        |   11 +-
+ ext/pcre/pcrelib/doc/pcre.txt                    | 2130 +++++++++++-----------
+ ext/pcre/pcrelib/pcre.h                          |    4 +-
+ ext/pcre/pcrelib/pcre_compile.c                  |  334 +++-
+ ext/pcre/pcrelib/pcre_exec.c                     |    5 +-
+ ext/pcre/pcrelib/pcre_internal.h                 |   17 +-
+ ext/pcre/pcrelib/pcre_jit_compile.c              |   77 +-
+ ext/pcre/pcrelib/pcre_study.c                    |   19 +-
+ ext/pcre/pcrelib/pcre_xclass.c                   |    2 +-
+ ext/pcre/pcrelib/sljit/sljitConfig.h             |    9 +
+ ext/pcre/pcrelib/sljit/sljitConfigInternal.h     |   13 +-
+ ext/pcre/pcrelib/sljit/sljitLir.c                |   10 +-
+ ext/pcre/pcrelib/sljit/sljitLir.h                |  128 +-
+ ext/pcre/pcrelib/sljit/sljitNativeARM_32.c       |   27 +-
+ ext/pcre/pcrelib/sljit/sljitNativeARM_64.c       |   48 +-
+ ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c    |   58 +-
+ ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c  |   15 +-
+ ext/pcre/pcrelib/sljit/sljitNativePPC_common.c   |   23 +-
+ ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c |   19 +-
+ ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c    |  311 ++--
+ ext/pcre/pcrelib/sljit/sljitNativeX86_common.c   |  129 +-
+ ext/pcre/pcrelib/testdata/grepoutput             |   12 +
+ ext/pcre/pcrelib/testdata/testinput1             |   13 +
+ ext/pcre/pcrelib/testdata/testinput11            |    4 +
+ ext/pcre/pcrelib/testdata/testinput12            |   17 +
+ ext/pcre/pcrelib/testdata/testinput14            |    2 +
+ ext/pcre/pcrelib/testdata/testinput17            |    2 +
+ ext/pcre/pcrelib/testdata/testinput2             |  139 ++
+ ext/pcre/pcrelib/testdata/testinput4             |    5 +
+ ext/pcre/pcrelib/testdata/testinput5             |    8 +
+ ext/pcre/pcrelib/testdata/testinput6             |   57 +
+ ext/pcre/pcrelib/testdata/testinput7             |   15 +
+ ext/pcre/pcrelib/testdata/testinput8             |    4 +
+ ext/pcre/pcrelib/testdata/testinputEBC           |    3 +
+ ext/pcre/pcrelib/testdata/testoutput1            |   23 +
+ ext/pcre/pcrelib/testdata/testoutput11-16        |   50 +-
+ ext/pcre/pcrelib/testdata/testoutput11-32        |   50 +-
+ ext/pcre/pcrelib/testdata/testoutput11-8         |   50 +-
+ ext/pcre/pcrelib/testdata/testoutput12           |   25 +
+ ext/pcre/pcrelib/testdata/testoutput14           |    2 +
+ ext/pcre/pcrelib/testdata/testoutput17           |    2 +
+ ext/pcre/pcrelib/testdata/testoutput2            |  380 +++-
+ ext/pcre/pcrelib/testdata/testoutput4            |    6 +
+ ext/pcre/pcrelib/testdata/testoutput5            |   45 +
+ ext/pcre/pcrelib/testdata/testoutput6            |   96 +
+ ext/pcre/pcrelib/testdata/testoutput7            |   57 +-
+ ext/pcre/pcrelib/testdata/testoutput8            |    6 +
+ ext/pcre/pcrelib/testdata/testoutputEBC          |    6 +
+ 51 files changed, 3144 insertions(+), 1511 deletions(-)
+
+diff --git a/ext/pcre/pcrelib/ChangeLog b/ext/pcre/pcrelib/ChangeLog
+index 359b412..5e5bf18 100644
+--- a/ext/pcre/pcrelib/ChangeLog
++++ b/ext/pcre/pcrelib/ChangeLog
+@@ -1,6 +1,182 @@
+ ChangeLog for PCRE
+ ------------------
+ 
++Note that the PCRE 8.xx series (PCRE1) is now in a bugfix-only state. All
++development is happening in the PCRE2 10.xx series.
++
++Version 8.38 23-November-2015
++-----------------------------
++
++1.  If a group that contained a recursive back reference also contained a
++    forward reference subroutine call followed by a non-forward-reference
++    subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to
++    compile correct code, leading to undefined behaviour or an internally
++    detected error. This bug was discovered by the LLVM fuzzer.
++
++2.  Quantification of certain items (e.g. atomic back references) could cause
++    incorrect code to be compiled when recursive forward references were
++    involved. For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/.
++    This bug was discovered by the LLVM fuzzer.
++
++3.  A repeated conditional group whose condition was a reference by name caused
++    a buffer overflow if there was more than one group with the given name.
++    This bug was discovered by the LLVM fuzzer.
++
++4.  A recursive back reference by name within a group that had the same name as
++    another group caused a buffer overflow. For example:
++    /(?J)(?'d'(?'d'\g{d}))/. This bug was discovered by the LLVM fuzzer.
++
++5.  A forward reference by name to a group whose number is the same as the
++    current group, for example in this pattern: /(?|(\k'Pm')|(?'Pm'))/, caused
++    a buffer overflow at compile time. This bug was discovered by the LLVM
++    fuzzer.
++
++6.  A lookbehind assertion within a set of mutually recursive subpatterns could
++    provoke a buffer overflow. This bug was discovered by the LLVM fuzzer.
++
++7.  Another buffer overflow bug involved duplicate named groups with a
++    reference between their definition, with a group that reset capture
++    numbers, for example: /(?J:(?|(?'R')(\k'R')|((?'R'))))/. This has been
++    fixed by always allowing for more memory, even if not needed. (A proper fix
++    is implemented in PCRE2, but it involves more refactoring.)
++
++8.  There was no check for integer overflow in subroutine calls such as (?123).
++
++9.  The table entry for \l in EBCDIC environments was incorrect, leading to its
++    being treated as a literal 'l' instead of causing an error.
++
++10. There was a buffer overflow if pcre_exec() was called with an ovector of
++    size 1. This bug was found by american fuzzy lop.
++
++11. If a non-capturing group containing a conditional group that could match
++    an empty string was repeated, it was not identified as matching an empty
++    string itself. For example: /^(?:(?(1)x|)+)+$()/.
++
++12. In an EBCDIC environment, pcretest was mishandling the escape sequences
++    \a and \e in test subject lines.
++
++13. In an EBCDIC environment, \a in a pattern was converted to the ASCII
++    instead of the EBCDIC value.
++
++14. The handling of \c in an EBCDIC environment has been revised so that it is
++    now compatible with the specification in Perl's perlebcdic page.
++
++15. The EBCDIC character 0x41 is a non-breaking space, equivalent to 0xa0 in
++    ASCII/Unicode. This has now been added to the list of characters that are
++    recognized as white space in EBCDIC.
++
++16. When PCRE was compiled without UCP support, the use of \p and \P gave an
++    error (correctly) when used outside a class, but did not give an error
++    within a class.
++
++17. \h within a class was incorrectly compiled in EBCDIC environments.
++
++18. A pattern with an unmatched closing parenthesis that contained a backward
++    assertion which itself contained a forward reference caused buffer
++    overflow. And example pattern is: /(?=di(?<=(?1))|(?=(.))))/.
++
++19. JIT should return with error when the compiled pattern requires more stack
++    space than the maximum.
++
++20. A possessively repeated conditional group that could match an empty string,
++    for example, /(?(R))*+/, was incorrectly compiled.
++
++21. Fix infinite recursion in the JIT compiler when certain patterns such as
++    /(?:|a|){100}x/ are analysed.
++
++22. Some patterns with character classes involving [: and \\ were incorrectly
++    compiled and could cause reading from uninitialized memory or an incorrect
++    error diagnosis.
++
++23. Pathological patterns containing many nested occurrences of [: caused
++    pcre_compile() to run for a very long time.
++
++24. A conditional group with only one branch has an implicit empty alternative
++    branch and must therefore be treated as potentially matching an empty
++    string.
++
++25. If (?R was followed by - or + incorrect behaviour happened instead of a
++    diagnostic.
++
++26. Arrange to give up on finding the minimum matching length for overly
++    complex patterns.
++
++27. Similar to (4) above: in a pattern with duplicated named groups and an
++    occurrence of (?| it is possible for an apparently non-recursive back
++    reference to become recursive if a later named group with the relevant
++    number is encountered. This could lead to a buffer overflow. Wen Guanxing
++    from Venustech ADLAB discovered this bug.
++
++28. If pcregrep was given the -q option with -c or -l, or when handling a
++    binary file, it incorrectly wrote output to stdout.
++
++29. The JIT compiler did not restore the control verb head in case of *THEN
++    control verbs. This issue was found by Karl Skomski with a custom LLVM
++    fuzzer.
++
++30. Error messages for syntax errors following \g and \k were giving inaccurate
++    offsets in the pattern.
++
++31. Added a check for integer overflow in conditions (?(<digits>) and
++    (?(R<digits>). This omission was discovered by Karl Skomski with the LLVM
++    fuzzer.
++
++32. Handling recursive references such as (?2) when the reference is to a group
++    later in the pattern uses code that is very hacked about and error-prone.
++    It has been re-written for PCRE2. Here in PCRE1, a check has been added to
++    give an internal error if it is obvious that compiling has gone wrong.
++
++33. The JIT compiler should not check repeats after a {0,1} repeat byte code.
++    This issue was found by Karl Skomski with a custom LLVM fuzzer.
++
++34. The JIT compiler should restore the control chain for empty possessive
++    repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer.
++
++35. Match limit check added to JIT recursion. This issue was found by Karl
++    Skomski with a custom LLVM fuzzer.
++
++36. Yet another case similar to 27 above has been circumvented by an
++    unconditional allocation of extra memory. This issue is fixed "properly" in
++    PCRE2 by refactoring the way references are handled. Wen Guanxing
++    from Venustech ADLAB discovered this bug.
++
++37. Fix two assertion fails in JIT. These issues were found by Karl Skomski
++    with a custom LLVM fuzzer.
++
++38. Fixed a corner case of range optimization in JIT.
++
++39. An incorrect error "overran compiling workspace" was given if there were
++    exactly enough group forward references such that the last one extended
++    into the workspace safety margin. The next one would have expanded the
++    workspace. The test for overflow was not including the safety margin.
++
++40. A match limit issue is fixed in JIT which was found by Karl Skomski
++    with a custom LLVM fuzzer.
++
++41. Remove the use of /dev/null in testdata/testinput2, because it doesn't
++    work under Windows. (Why has it taken so long for anyone to notice?)
++
++42. In a character class such as [\W\p{Any}] where both a negative-type escape
++    ("not a word character") and a property escape were present, the property
++    escape was being ignored.
++
++43. Fix crash caused by very long (*MARK) or (*THEN) names.
++
++44. A sequence such as [[:punct:]b] that is, a POSIX character class followed
++    by a single ASCII character in a class item, was incorrectly compiled in
++    UCP mode. The POSIX class got lost, but only if the single character
++    followed it.
++
++45. [:punct:] in UCP mode was matching some characters in the range 128-255
++    that should not have been matched.
++
++46. If [:^ascii:] or [:^xdigit:] or [:^cntrl:] are present in a non-negated
++    class, all characters with code points greater than 255 are in the class.
++    When a Unicode property was also in the class (if PCRE_UCP is set, escapes
++    such as \w are turned into Unicode properties), wide characters were not
++    correctly handled, and could fail to match.
++
++
+ Version 8.37 28-April-2015
+ --------------------------
+ 
+diff --git a/ext/pcre/pcrelib/NEWS b/ext/pcre/pcrelib/NEWS
+index 064bf27..7e42dcb 100644
+--- a/ext/pcre/pcrelib/NEWS
++++ b/ext/pcre/pcrelib/NEWS
+@@ -1,6 +1,14 @@
+ News about PCRE releases
+ ------------------------
+ 
++Release 8.38 23-November-2015
++-----------------------------
++
++This is bug-fix release. Note that this library (now called PCRE1) is now being
++maintained for bug fixes only. New projects are advised to use the new PCRE2
++libraries.
++
++
+ Release 8.37 28-April-2015
+ --------------------------
+ 
+diff --git a/ext/pcre/pcrelib/config.h b/ext/pcre/pcrelib/config.h
+index ba06a17..0f7a9f7 100644
+--- a/ext/pcre/pcrelib/config.h
++++ b/ext/pcre/pcrelib/config.h
+@@ -234,8 +234,8 @@ them both to 0; an emulation function will be used. */
+ #define LINK_SIZE 2
+ #endif
+ 
+-/* Define to the sub-directory in which libtool stores uninstalled libraries.
+-   */
++/* Define to the sub-directory where libtool stores uninstalled libraries. */
++/* This is ignored unless you are using libtool. */
+ #ifndef LT_OBJDIR
+ #define LT_OBJDIR ".libs/"
+ #endif
+@@ -314,7 +314,7 @@ them both to 0; an emulation function will be used. */
+ #define PACKAGE_NAME "PCRE"
+ 
+ /* Define to the full name and version of this package. */
+-#define PACKAGE_STRING "PCRE 8.37"
++#define PACKAGE_STRING "PCRE 8.38"
+ 
+ /* Define to the one symbol short name of this package. */
+ #define PACKAGE_TARNAME "pcre"
+@@ -323,7 +323,7 @@ them both to 0; an emulation function will be used. */
+ #define PACKAGE_URL ""
+ 
+ /* Define to the version of this package. */
+-#define PACKAGE_VERSION "8.37"
++#define PACKAGE_VERSION "8.38"
+ 
+ /* to make a symbol visible */
+ /* #undef PCRECPP_EXP_DECL */
+@@ -439,7 +439,7 @@ them both to 0; an emulation function will be used. */
+ 
+ /* Version number of package */
+ #ifndef VERSION
+-#define VERSION "8.37"
++#define VERSION "8.38"
+ #endif
+ 
+ /* Define to empty if `const' does not conform to ANSI C. */
+@@ -451,4 +451,3 @@ them both to 0; an emulation function will be used. */
+ 
+ /* Define to `unsigned int' if <sys/types.h> does not define. */
+ /* #undef size_t */
+-
+diff --git a/ext/pcre/pcrelib/doc/pcre.txt b/ext/pcre/pcrelib/doc/pcre.txt
+index ce27f4b..76a47c7 100644
+--- a/ext/pcre/pcrelib/doc/pcre.txt
++++ b/ext/pcre/pcrelib/doc/pcre.txt
+@@ -13,7 +13,18 @@ PCRE(3)                    Library Functions Manual                    PCRE(3)
+ 
+ 
+ NAME
+-       PCRE - Perl-compatible regular expressions
++       PCRE - Perl-compatible regular expressions (original API)
++
++PLEASE TAKE NOTE
++
++       This  document relates to PCRE releases that use the original API, with
++       library names libpcre, libpcre16, and libpcre32. January 2015  saw  the
++       first release of a new API, known as PCRE2, with release numbers start-
++       ing  at  10.00  and  library   names   libpcre2-8,   libpcre2-16,   and
++       libpcre2-32. The old libraries (now called PCRE1) are still being main-
++       tained for bug fixes,  but  there  will  be  no  new  development.  New
++       projects are advised to use the new PCRE2 libraries.
++
+ 
+ INTRODUCTION
+ 
+@@ -179,8 +190,8 @@ AUTHOR
+ 
+ REVISION
+ 
+-       Last updated: 08 January 2014
+-       Copyright (c) 1997-2014 University of Cambridge.
++       Last updated: 10 February 2015
++       Copyright (c) 1997-2015 University of Cambridge.
+ ------------------------------------------------------------------------------
+ 
+ 
+@@ -4989,7 +5000,8 @@ BACKSLASH
+        appearance  of non-printing characters, apart from the binary zero that
+        terminates a pattern, but when a pattern  is  being  prepared  by  text
+        editing,  it  is  often  easier  to  use  one  of  the following escape
+-       sequences than the binary character it represents:
++       sequences than the binary character it represents.  In an ASCII or Uni-
++       code environment, these escapes are as follows:
+ 
+          \a        alarm, that is, the BEL character (hex 07)
+          \cx       "control-x", where x is any ASCII character
+@@ -5005,55 +5017,67 @@ BACKSLASH
+          \x{hhh..} character with hex code hhh.. (non-JavaScript mode)
+          \uhhhh    character with hex code hhhh (JavaScript mode only)
+ 
+-       The precise effect of \cx on ASCII characters is as follows: if x is  a
+-       lower  case  letter,  it  is converted to upper case. Then bit 6 of the
++       The  precise effect of \cx on ASCII characters is as follows: if x is a
++       lower case letter, it is converted to upper case. Then  bit  6  of  the
+        character (hex 40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A
+-       (A  is  41, Z is 5A), but \c{ becomes hex 3B ({ is 7B), and \c; becomes
+-       hex 7B (; is 3B). If the data item (byte or 16-bit value) following  \c
+-       has  a  value greater than 127, a compile-time error occurs. This locks
++       (A is 41, Z is 5A), but \c{ becomes hex 3B ({ is 7B), and  \c;  becomes
++       hex  7B (; is 3B). If the data item (byte or 16-bit value) following \c
++       has a value greater than 127, a compile-time error occurs.  This  locks
+        out non-ASCII characters in all modes.
+ 
+-       The \c facility was designed for use with ASCII  characters,  but  with
+-       the  extension  to  Unicode it is even less useful than it once was. It
+-       is, however, recognized when PCRE is compiled  in  EBCDIC  mode,  where
+-       data  items  are always bytes. In this mode, all values are valid after
+-       \c. If the next character is a lower case letter, it  is  converted  to
+-       upper  case.  Then  the  0xc0  bits  of the byte are inverted. Thus \cA
+-       becomes hex 01, as in ASCII (A is C1), but because the  EBCDIC  letters
+-       are  disjoint,  \cZ becomes hex 29 (Z is E9), and other characters also
+-       generate different values.
+-
+-       After \0 up to two further octal digits are read. If  there  are  fewer
+-       than  two  digits,  just  those  that  are  present  are used. Thus the
+-       sequence \0\x\07 specifies two binary zeros followed by a BEL character
+-       (code  value 7). Make sure you supply two digits after the initial zero
++       When PCRE is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t gener-
++       ate the appropriate EBCDIC code values. The \c escape is  processed  as
++       specified for Perl in the perlebcdic document. The only characters that
++       are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^,  _,  or  ?.
++       Any  other  character  provokes  a  compile-time error. The sequence \@
++       encodes character code 0; the letters (in either case)  encode  charac-
++       ters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31
++       (hex 1B to hex 1F), and \? becomes either 255 (hex FF) or 95 (hex 5F).
++
++       Thus, apart from \?, these escapes generate  the  same  character  code
++       values  as  they do in an ASCII environment, though the meanings of the
++       values mostly differ. For example, \G always generates  code  value  7,
++       which is BEL in ASCII but DEL in EBCDIC.
++
++       The  sequence  \?  generates DEL (127, hex 7F) in an ASCII environment,
++       but because 127 is not a control character in  EBCDIC,  Perl  makes  it
++       generate  the  APC character. Unfortunately, there are several variants
++       of EBCDIC. In most of them the APC character has  the  value  255  (hex
++       FF),  but  in  the one Perl calls POSIX-BC its value is 95 (hex 5F). If
++       certain other characters have POSIX-BC values, PCRE makes  \?  generate
++       95; otherwise it generates 255.
++
++       After  \0  up  to two further octal digits are read. If there are fewer
++       than two digits, just  those  that  are  present  are  used.  Thus  the
++       sequence \0\x\015 specifies two binary zeros followed by a CR character
++       (code value 13). Make sure you supply two digits after the initial zero
+        if the pattern character that follows is itself an octal digit.
+ 
+-       The escape \o must be followed by a sequence of octal digits,  enclosed
+-       in  braces.  An  error occurs if this is not the case. This escape is a
+-       recent addition to Perl; it provides way of specifying  character  code
+-       points  as  octal  numbers  greater than 0777, and it also allows octal
++       The  escape \o must be followed by a sequence of octal digits, enclosed
++       in braces. An error occurs if this is not the case. This  escape  is  a
++       recent  addition  to Perl; it provides way of specifying character code
++       points as octal numbers greater than 0777, and  it  also  allows  octal
+        numbers and back references to be unambiguously specified.
+ 
+        For greater clarity and unambiguity, it is best to avoid following \ by
+        a digit greater than zero. Instead, use \o{} or \x{} to specify charac-
+-       ter numbers, and \g{} to specify back references. The  following  para-
++       ter  numbers,  and \g{} to specify back references. The following para-
+        graphs describe the old, ambiguous syntax.
+ 
+        The handling of a backslash followed by a digit other than 0 is compli-
+-       cated, and Perl has changed in recent releases, causing  PCRE  also  to
++       cated,  and  Perl  has changed in recent releases, causing PCRE also to
+        change. Outside a character class, PCRE reads the digit and any follow-
+-       ing digits as a decimal number. If the number is less  than  8,  or  if
+-       there  have been at least that many previous capturing left parentheses
+-       in the expression, the entire sequence is taken as a back reference.  A
+-       description  of how this works is given later, following the discussion
++       ing  digits  as  a  decimal number. If the number is less than 8, or if
++       there have been at least that many previous capturing left  parentheses
++       in  the expression, the entire sequence is taken as a back reference. A
++       description of how this works is given later, following the  discussion
+        of parenthesized subpatterns.
+ 
+-       Inside a character class, or if  the  decimal  number  following  \  is
++       Inside  a  character  class,  or  if  the decimal number following \ is
+        greater than 7 and there have not been that many capturing subpatterns,
+-       PCRE handles \8 and \9 as the literal characters "8" and "9", and  oth-
++       PCRE  handles \8 and \9 as the literal characters "8" and "9", and oth-
+        erwise re-reads up to three octal digits following the backslash, using
+-       them to generate a data character.  Any  subsequent  digits  stand  for
++       them  to  generate  a  data character.  Any subsequent digits stand for
+        themselves. For example:
+ 
+          \040   is another way of writing an ASCII space
+@@ -5071,31 +5095,31 @@ BACKSLASH
+          \81    is either a back reference, or the two
+                    characters "8" and "1"
+ 
+-       Note  that octal values of 100 or greater that are specified using this
+-       syntax must not be introduced by a leading zero, because no  more  than
++       Note that octal values of 100 or greater that are specified using  this
++       syntax  must  not be introduced by a leading zero, because no more than
+        three octal digits are ever read.
+ 
+-       By  default, after \x that is not followed by {, from zero to two hexa-
+-       decimal digits are read (letters can be in upper or  lower  case).  Any
++       By default, after \x that is not followed by {, from zero to two  hexa-
++       decimal  digits  are  read (letters can be in upper or lower case). Any
+        number of hexadecimal digits may appear between \x{ and }. If a charac-
+-       ter other than a hexadecimal digit appears between \x{  and  },  or  if
++       ter  other  than  a  hexadecimal digit appears between \x{ and }, or if
+        there is no terminating }, an error occurs.
+ 
+-       If  the  PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \x
+-       is as just described only when it is followed by two  hexadecimal  dig-
+-       its.   Otherwise,  it  matches  a  literal "x" character. In JavaScript
++       If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation  of  \x
++       is  as  just described only when it is followed by two hexadecimal dig-
++       its.  Otherwise, it matches a  literal  "x"  character.  In  JavaScript
+        mode, support for code points greater than 256 is provided by \u, which
+-       must  be  followed  by  four hexadecimal digits; otherwise it matches a
++       must be followed by four hexadecimal digits;  otherwise  it  matches  a
+        literal "u" character.
+ 
+        Characters whose value is less than 256 can be defined by either of the
+-       two  syntaxes for \x (or by \u in JavaScript mode). There is no differ-
++       two syntaxes for \x (or by \u in JavaScript mode). There is no  differ-
+        ence in the way they are handled. For example, \xdc is exactly the same
+        as \x{dc} (or \u00dc in JavaScript mode).
+ 
+    Constraints on character values
+ 
+-       Characters  that  are  specified using octal or hexadecimal numbers are
++       Characters that are specified using octal or  hexadecimal  numbers  are
+        limited to certain values, as follows:
+ 
+          8-bit non-UTF mode    less than 0x100
+@@ -5105,44 +5129,44 @@ BACKSLASH
+          32-bit non-UTF mode   less than 0x100000000
+          32-bit UTF-32 mode    less than 0x10ffff and a valid codepoint
+ 
+-       Invalid Unicode codepoints are the range  0xd800  to  0xdfff  (the  so-
++       Invalid  Unicode  codepoints  are  the  range 0xd800 to 0xdfff (the so-
+        called "surrogate" codepoints), and 0xffef.
+ 
+    Escape sequences in character classes
+ 
+        All the sequences that define a single character value can be used both
+-       inside and outside character classes. In addition, inside  a  character
++       inside  and  outside character classes. In addition, inside a character
+        class, \b is interpreted as the backspace character (hex 08).
+ 
+-       \N  is not allowed in a character class. \B, \R, and \X are not special
+-       inside a character class. Like  other  unrecognized  escape  sequences,
+-       they  are  treated  as  the  literal  characters  "B",  "R", and "X" by
+-       default, but cause an error if the PCRE_EXTRA option is set. Outside  a
++       \N is not allowed in a character class. \B, \R, and \X are not  special
++       inside  a  character  class.  Like other unrecognized escape sequences,
++       they are treated as  the  literal  characters  "B",  "R",  and  "X"  by
++       default,  but cause an error if the PCRE_EXTRA option is set. Outside a
+        character class, these sequences have different meanings.
+ 
+    Unsupported escape sequences
+ 
+-       In  Perl, the sequences \l, \L, \u, and \U are recognized by its string
+-       handler and used  to  modify  the  case  of  following  characters.  By
+-       default,  PCRE does not support these escape sequences. However, if the
+-       PCRE_JAVASCRIPT_COMPAT option is set, \U matches a "U"  character,  and
++       In Perl, the sequences \l, \L, \u, and \U are recognized by its  string
++       handler  and  used  to  modify  the  case  of  following characters. By
++       default, PCRE does not support these escape sequences. However, if  the
++       PCRE_JAVASCRIPT_COMPAT  option  is set, \U matches a "U" character, and
+        \u can be used to define a character by code point, as described in the
+        previous section.
+ 
+    Absolute and relative back references
+ 
+-       The sequence \g followed by an unsigned or a negative  number,  option-
+-       ally  enclosed  in braces, is an absolute or relative back reference. A
++       The  sequence  \g followed by an unsigned or a negative number, option-
++       ally enclosed in braces, is an absolute or relative back  reference.  A
+        named back reference can be coded as \g{name}. Back references are dis-
+        cussed later, following the discussion of parenthesized subpatterns.
+ 
+    Absolute and relative subroutine calls
+ 
+-       For  compatibility with Oniguruma, the non-Perl syntax \g followed by a
++       For compatibility with Oniguruma, the non-Perl syntax \g followed by  a
+        name or a number enclosed either in angle brackets or single quotes, is
+-       an  alternative  syntax for referencing a subpattern as a "subroutine".
+-       Details are discussed later.   Note  that  \g{...}  (Perl  syntax)  and
+-       \g<...>  (Oniguruma  syntax)  are  not synonymous. The former is a back
++       an alternative syntax for referencing a subpattern as  a  "subroutine".
++       Details  are  discussed  later.   Note  that  \g{...} (Perl syntax) and
++       \g<...> (Oniguruma syntax) are not synonymous. The  former  is  a  back
+        reference; the latter is a subroutine call.
+ 
+    Generic character types
+@@ -5161,59 +5185,59 @@ BACKSLASH
+          \W     any "non-word" character
+ 
+        There is also the single sequence \N, which matches a non-newline char-
+-       acter.   This  is the same as the "." metacharacter when PCRE_DOTALL is
+-       not set. Perl also uses \N to match characters by name; PCRE  does  not
++       acter.  This is the same as the "." metacharacter when  PCRE_DOTALL  is
++       not  set.  Perl also uses \N to match characters by name; PCRE does not
+        support this.
+ 
+-       Each  pair of lower and upper case escape sequences partitions the com-
+-       plete set of characters into two disjoint  sets.  Any  given  character
+-       matches  one, and only one, of each pair. The sequences can appear both
+-       inside and outside character classes. They each match one character  of
+-       the  appropriate  type.  If the current matching point is at the end of
+-       the subject string, all of them fail, because there is no character  to
++       Each pair of lower and upper case escape sequences partitions the  com-
++       plete  set  of  characters  into two disjoint sets. Any given character
++       matches one, and only one, of each pair. The sequences can appear  both
++       inside  and outside character classes. They each match one character of
++       the appropriate type. If the current matching point is at  the  end  of
++       the  subject string, all of them fail, because there is no character to
+        match.
+ 
+-       For  compatibility with Perl, \s did not used to match the VT character
+-       (code 11), which made it different from the the  POSIX  "space"  class.
+-       However,  Perl  added  VT  at  release  5.18, and PCRE followed suit at
+-       release 8.34. The default \s characters are now HT  (9),  LF  (10),  VT
+-       (11),  FF  (12),  CR  (13),  and space (32), which are defined as white
++       For compatibility with Perl, \s did not used to match the VT  character
++       (code  11),  which  made it different from the the POSIX "space" class.
++       However, Perl added VT at release  5.18,  and  PCRE  followed  suit  at
++       release  8.34.  The  default  \s characters are now HT (9), LF (10), VT
++       (11), FF (12), CR (13), and space (32),  which  are  defined  as  white
+        space in the "C" locale. This list may vary if locale-specific matching
+-       is  taking place. For example, in some locales the "non-breaking space"
+-       character (\xA0) is recognized as white space, and  in  others  the  VT
++       is taking place. For example, in some locales the "non-breaking  space"
++       character  (\xA0)  is  recognized  as white space, and in others the VT
+        character is not.
+ 
+-       A  "word"  character is an underscore or any character that is a letter
+-       or digit.  By default, the definition of letters  and  digits  is  con-
+-       trolled  by PCRE's low-valued character tables, and may vary if locale-
+-       specific matching is taking place (see "Locale support" in the  pcreapi
+-       page).  For  example,  in  a French locale such as "fr_FR" in Unix-like
+-       systems, or "french" in Windows, some character codes greater than  127
+-       are  used  for  accented letters, and these are then matched by \w. The
++       A "word" character is an underscore or any character that is  a  letter
++       or  digit.   By  default,  the definition of letters and digits is con-
++       trolled by PCRE's low-valued character tables, and may vary if  locale-
++       specific  matching is taking place (see "Locale support" in the pcreapi
++       page). For example, in a French locale such  as  "fr_FR"  in  Unix-like
++       systems,  or "french" in Windows, some character codes greater than 127
++       are used for accented letters, and these are then matched  by  \w.  The
+        use of locales with Unicode is discouraged.
+ 
+-       By default, characters whose code points are  greater  than  127  never
++       By  default,  characters  whose  code points are greater than 127 never
+        match \d, \s, or \w, and always match \D, \S, and \W, although this may
+-       vary for characters in the range 128-255 when locale-specific  matching
+-       is  happening.   These  escape sequences retain their original meanings
+-       from before Unicode support was available, mainly for  efficiency  rea-
+-       sons.  If  PCRE  is  compiled  with  Unicode  property support, and the
+-       PCRE_UCP option is set, the behaviour is changed so that Unicode  prop-
++       vary  for characters in the range 128-255 when locale-specific matching
++       is happening.  These escape sequences retain  their  original  meanings
++       from  before  Unicode support was available, mainly for efficiency rea-
++       sons. If PCRE is  compiled  with  Unicode  property  support,  and  the
++       PCRE_UCP  option is set, the behaviour is changed so that Unicode prop-
+        erties are used to determine character types, as follows:
+ 
+          \d  any character that matches \p{Nd} (decimal digit)
+          \s  any character that matches \p{Z} or \h or \v
+          \w  any character that matches \p{L} or \p{N}, plus underscore
+ 
+-       The  upper case escapes match the inverse sets of characters. Note that
+-       \d matches only decimal digits, whereas \w matches any  Unicode  digit,
+-       as  well as any Unicode letter, and underscore. Note also that PCRE_UCP
+-       affects \b, and \B because they are defined in  terms  of  \w  and  \W.
++       The upper case escapes match the inverse sets of characters. Note  that
++       \d  matches  only decimal digits, whereas \w matches any Unicode digit,
++       as well as any Unicode letter, and underscore. Note also that  PCRE_UCP
++       affects  \b,  and  \B  because  they are defined in terms of \w and \W.
+        Matching these sequences is noticeably slower when PCRE_UCP is set.
+ 
+-       The  sequences  \h, \H, \v, and \V are features that were added to Perl
+-       at release 5.10. In contrast to the other sequences, which  match  only
+-       ASCII  characters  by  default,  these always match certain high-valued
++       The sequences \h, \H, \v, and \V are features that were added  to  Perl
++       at  release  5.10. In contrast to the other sequences, which match only
++       ASCII characters by default, these  always  match  certain  high-valued
+        code points, whether or not PCRE_UCP is set. The horizontal space char-
+        acters are:
+ 
+@@ -5252,110 +5276,110 @@ BACKSLASH
+ 
+    Newline sequences
+ 
+-       Outside a character class, by default, the escape sequence  \R  matches
+-       any  Unicode newline sequence. In 8-bit non-UTF-8 mode \R is equivalent
++       Outside  a  character class, by default, the escape sequence \R matches
++       any Unicode newline sequence. In 8-bit non-UTF-8 mode \R is  equivalent
+        to the following:
+ 
+          (?>\r\n|\n|\x0b|\f|\r|\x85)
+ 
+-       This is an example of an "atomic group", details  of  which  are  given
++       This  is  an  example  of an "atomic group", details of which are given
+        below.  This particular group matches either the two-character sequence
+-       CR followed by LF, or  one  of  the  single  characters  LF  (linefeed,
+-       U+000A),  VT  (vertical  tab, U+000B), FF (form feed, U+000C), CR (car-
+-       riage return, U+000D), or NEL (next line,  U+0085).  The  two-character
++       CR  followed  by  LF,  or  one  of  the single characters LF (linefeed,
++       U+000A), VT (vertical tab, U+000B), FF (form feed,  U+000C),  CR  (car-
++       riage  return,  U+000D),  or NEL (next line, U+0085). The two-character
+        sequence is treated as a single unit that cannot be split.
+ 
+-       In  other modes, two additional characters whose codepoints are greater
++       In other modes, two additional characters whose codepoints are  greater
+        than 255 are added: LS (line separator, U+2028) and PS (paragraph sepa-
+-       rator,  U+2029).   Unicode character property support is not needed for
++       rator, U+2029).  Unicode character property support is not  needed  for
+        these characters to be recognized.
+ 
+        It is possible to restrict \R to match only CR, LF, or CRLF (instead of
+-       the  complete  set  of  Unicode  line  endings)  by  setting the option
++       the complete set  of  Unicode  line  endings)  by  setting  the  option
+        PCRE_BSR_ANYCRLF either at compile time or when the pattern is matched.
+        (BSR is an abbrevation for "backslash R".) This can be made the default
+-       when PCRE is built; if this is the case, the  other  behaviour  can  be
+-       requested  via  the  PCRE_BSR_UNICODE  option.   It is also possible to
+-       specify these settings by starting a pattern string  with  one  of  the
++       when  PCRE  is  built;  if this is the case, the other behaviour can be
++       requested via the PCRE_BSR_UNICODE option.   It  is  also  possible  to
++       specify  these  settings  by  starting a pattern string with one of the
+        following sequences:
+ 
+          (*BSR_ANYCRLF)   CR, LF, or CRLF only
+          (*BSR_UNICODE)   any Unicode newline sequence
+ 
+        These override the default and the options given to the compiling func-
+-       tion, but they can themselves be  overridden  by  options  given  to  a
+-       matching  function.  Note  that  these  special settings, which are not
+-       Perl-compatible, are recognized only at the very start  of  a  pattern,
+-       and  that  they  must  be  in  upper  case. If more than one of them is
+-       present, the last one is used. They can be combined with  a  change  of
++       tion,  but  they  can  themselves  be  overridden by options given to a
++       matching function. Note that these  special  settings,  which  are  not
++       Perl-compatible,  are  recognized  only at the very start of a pattern,
++       and that they must be in upper case.  If  more  than  one  of  them  is
++       present,  the  last  one is used. They can be combined with a change of
+        newline convention; for example, a pattern can start with:
+ 
+          (*ANY)(*BSR_ANYCRLF)
+ 
+-       They  can also be combined with the (*UTF8), (*UTF16), (*UTF32), (*UTF)
++       They can also be combined with the (*UTF8), (*UTF16), (*UTF32),  (*UTF)
+        or (*UCP) special sequences. Inside a character class, \R is treated as
+-       an  unrecognized  escape  sequence,  and  so  matches the letter "R" by
++       an unrecognized escape sequence, and  so  matches  the  letter  "R"  by
+        default, but causes an error if PCRE_EXTRA is set.
+ 
+    Unicode character properties
+ 
+        When PCRE is built with Unicode character property support, three addi-
+-       tional  escape sequences that match characters with specific properties
+-       are available.  When in 8-bit non-UTF-8 mode, these  sequences  are  of
+-       course  limited  to  testing  characters whose codepoints are less than
++       tional escape sequences that match characters with specific  properties
++       are  available.   When  in 8-bit non-UTF-8 mode, these sequences are of
++       course limited to testing characters whose  codepoints  are  less  than
+        256, but they do work in this mode.  The extra escape sequences are:
+ 
+          \p{xx}   a character with the xx property
+          \P{xx}   a character without the xx property
+          \X       a Unicode extended grapheme cluster
+ 
+-       The property names represented by xx above are limited to  the  Unicode
++       The  property  names represented by xx above are limited to the Unicode
+        script names, the general category properties, "Any", which matches any
+-       character  (including  newline),  and  some  special  PCRE   properties
+-       (described  in the next section).  Other Perl properties such as "InMu-
+-       sicalSymbols" are not currently supported by PCRE.  Note  that  \P{Any}
++       character   (including  newline),  and  some  special  PCRE  properties
++       (described in the next section).  Other Perl properties such as  "InMu-
++       sicalSymbols"  are  not  currently supported by PCRE. Note that \P{Any}
+        does not match any characters, so always causes a match failure.
+ 
+        Sets of Unicode characters are defined as belonging to certain scripts.
+-       A character from one of these sets can be matched using a script  name.
++       A  character from one of these sets can be matched using a script name.
+        For example:
+ 
+          \p{Greek}
+          \P{Han}
+ 
+-       Those  that are not part of an identified script are lumped together as
++       Those that are not part of an identified script are lumped together  as
+        "Common". The current list of scripts is:
+ 
+-       Arabic, Armenian, Avestan, Balinese, Bamum, Bassa_Vah, Batak,  Bengali,
+-       Bopomofo,  Brahmi,  Braille, Buginese, Buhid, Canadian_Aboriginal, Car-
++       Arabic,  Armenian, Avestan, Balinese, Bamum, Bassa_Vah, Batak, Bengali,
++       Bopomofo, Brahmi, Braille, Buginese, Buhid,  Canadian_Aboriginal,  Car-
+        ian, Caucasian_Albanian, Chakma, Cham, Cherokee, Common, Coptic, Cunei-
+        form, Cypriot, Cyrillic, Deseret, Devanagari, Duployan, Egyptian_Hiero-
+        glyphs,  Elbasan,  Ethiopic,  Georgian,  Glagolitic,  Gothic,  Grantha,
+-       Greek,  Gujarati,  Gurmukhi,  Han,  Hangul,  Hanunoo, Hebrew, Hiragana,
+-       Imperial_Aramaic,    Inherited,     Inscriptional_Pahlavi,     Inscrip-
+-       tional_Parthian,   Javanese,   Kaithi,   Kannada,  Katakana,  Kayah_Li,
+-       Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Latin, Lepcha,  Limbu,  Lin-
+-       ear_A,  Linear_B,  Lisu,  Lycian, Lydian, Mahajani, Malayalam, Mandaic,
+-       Manichaean,     Meetei_Mayek,     Mende_Kikakui,      Meroitic_Cursive,
+-       Meroitic_Hieroglyphs,  Miao,  Modi, Mongolian, Mro, Myanmar, Nabataean,
+-       New_Tai_Lue,  Nko,  Ogham,  Ol_Chiki,  Old_Italic,   Old_North_Arabian,
++       Greek, Gujarati, Gurmukhi,  Han,  Hangul,  Hanunoo,  Hebrew,  Hiragana,
++       Imperial_Aramaic,     Inherited,     Inscriptional_Pahlavi,    Inscrip-
++       tional_Parthian,  Javanese,  Kaithi,   Kannada,   Katakana,   Kayah_Li,
++       Kharoshthi,  Khmer,  Khojki, Khudawadi, Lao, Latin, Lepcha, Limbu, Lin-
++       ear_A, Linear_B, Lisu, Lycian, Lydian,  Mahajani,  Malayalam,  Mandaic,
++       Manichaean,      Meetei_Mayek,     Mende_Kikakui,     Meroitic_Cursive,
++       Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro,  Myanmar,  Nabataean,
++       New_Tai_Lue,   Nko,  Ogham,  Ol_Chiki,  Old_Italic,  Old_North_Arabian,
+        Old_Permic, Old_Persian, Old_South_Arabian, Old_Turkic, Oriya, Osmanya,
+        Pahawh_Hmong,    Palmyrene,    Pau_Cin_Hau,    Phags_Pa,    Phoenician,
+-       Psalter_Pahlavi,  Rejang,  Runic,  Samaritan, Saurashtra, Sharada, Sha-
+-       vian, Siddham, Sinhala, Sora_Sompeng, Sundanese, Syloti_Nagri,  Syriac,
+-       Tagalog,  Tagbanwa,  Tai_Le,  Tai_Tham, Tai_Viet, Takri, Tamil, Telugu,
+-       Thaana, Thai, Tibetan, Tifinagh, Tirhuta, Ugaritic,  Vai,  Warang_Citi,
++       Psalter_Pahlavi, Rejang, Runic, Samaritan,  Saurashtra,  Sharada,  Sha-
++       vian,  Siddham, Sinhala, Sora_Sompeng, Sundanese, Syloti_Nagri, Syriac,
++       Tagalog, Tagbanwa, Tai_Le, Tai_Tham, Tai_Viet,  Takri,  Tamil,  Telugu,
++       Thaana,  Thai,  Tibetan, Tifinagh, Tirhuta, Ugaritic, Vai, Warang_Citi,
+        Yi.
+ 
+        Each character has exactly one Unicode general category property, spec-
+-       ified by a two-letter abbreviation. For compatibility with Perl,  nega-
+-       tion  can  be  specified  by including a circumflex between the opening
+-       brace and the property name.  For  example,  \p{^Lu}  is  the  same  as
++       ified  by a two-letter abbreviation. For compatibility with Perl, nega-
++       tion can be specified by including a  circumflex  between  the  opening
++       brace  and  the  property  name.  For  example,  \p{^Lu} is the same as
+        \P{Lu}.
+ 
+        If only one letter is specified with \p or \P, it includes all the gen-
+-       eral category properties that start with that letter. In this case,  in
+-       the  absence of negation, the curly brackets in the escape sequence are
++       eral  category properties that start with that letter. In this case, in
++       the absence of negation, the curly brackets in the escape sequence  are
+        optional; these two examples have the same effect:
+ 
+          \p{L}
+@@ -5407,73 +5431,73 @@ BACKSLASH
+          Zp    Paragraph separator
+          Zs    Space separator
+ 
+-       The special property L& is also supported: it matches a character  that
+-       has  the  Lu,  Ll, or Lt property, in other words, a letter that is not
++       The  special property L& is also supported: it matches a character that
++       has the Lu, Ll, or Lt property, in other words, a letter  that  is  not
+        classified as a modifier or "other".
+ 
+-       The Cs (Surrogate) property applies only to  characters  in  the  range
+-       U+D800  to U+DFFF. Such characters are not valid in Unicode strings and
+-       so cannot be tested by PCRE, unless  UTF  validity  checking  has  been
++       The  Cs  (Surrogate)  property  applies only to characters in the range
++       U+D800 to U+DFFF. Such characters are not valid in Unicode strings  and
++       so  cannot  be  tested  by  PCRE, unless UTF validity checking has been
+        turned    off    (see    the    discussion    of    PCRE_NO_UTF8_CHECK,
+-       PCRE_NO_UTF16_CHECK and PCRE_NO_UTF32_CHECK in the pcreapi page).  Perl
++       PCRE_NO_UTF16_CHECK  and PCRE_NO_UTF32_CHECK in the pcreapi page). Perl
+        does not support the Cs property.
+ 
+-       The  long  synonyms  for  property  names  that  Perl supports (such as
+-       \p{Letter}) are not supported by PCRE, nor is it  permitted  to  prefix
++       The long synonyms for  property  names  that  Perl  supports  (such  as
++       \p{Letter})  are  not  supported by PCRE, nor is it permitted to prefix
+        any of these properties with "Is".
+ 
+        No character that is in the Unicode table has the Cn (unassigned) prop-
+        erty.  Instead, this property is assumed for any code point that is not
+        in the Unicode table.
+ 
+-       Specifying  caseless  matching  does not affect these escape sequences.
+-       For example, \p{Lu} always matches only upper  case  letters.  This  is
++       Specifying caseless matching does not affect  these  escape  sequences.
++       For  example,  \p{Lu}  always  matches only upper case letters. This is
+        different from the behaviour of current versions of Perl.
+ 
+-       Matching  characters  by Unicode property is not fast, because PCRE has
+-       to do a multistage table lookup in order to find  a  character's  prop-
++       Matching characters by Unicode property is not fast, because  PCRE  has
++       to  do  a  multistage table lookup in order to find a character's prop-
+        erty. That is why the traditional escape sequences such as \d and \w do
+        not use Unicode properties in PCRE by default, though you can make them
+-       do  so  by  setting the PCRE_UCP option or by starting the pattern with
++       do so by setting the PCRE_UCP option or by starting  the  pattern  with
+        (*UCP).
+ 
+    Extended grapheme clusters
+ 
+-       The \X escape matches any number of Unicode  characters  that  form  an
++       The  \X  escape  matches  any number of Unicode characters that form an
+        "extended grapheme cluster", and treats the sequence as an atomic group
+-       (see below).  Up to and including release 8.31, PCRE  matched  an  ear-
++       (see  below).   Up  to and including release 8.31, PCRE matched an ear-
+        lier, simpler definition that was equivalent to
+ 
+          (?>\PM\pM*)
+ 
+-       That  is,  it matched a character without the "mark" property, followed
+-       by zero or more characters with the "mark"  property.  Characters  with
+-       the  "mark"  property are typically non-spacing accents that affect the
++       That is, it matched a character without the "mark"  property,  followed
++       by  zero  or  more characters with the "mark" property. Characters with
++       the "mark" property are typically non-spacing accents that  affect  the
+        preceding character.
+ 
+-       This simple definition was extended in Unicode to include more  compli-
+-       cated  kinds of composite character by giving each character a grapheme
+-       breaking property, and creating rules  that  use  these  properties  to
+-       define  the  boundaries  of  extended grapheme clusters. In releases of
++       This  simple definition was extended in Unicode to include more compli-
++       cated kinds of composite character by giving each character a  grapheme
++       breaking  property,  and  creating  rules  that use these properties to
++       define the boundaries of extended grapheme  clusters.  In  releases  of
+        PCRE later than 8.31, \X matches one of these clusters.
+ 
+-       \X always matches at least one character. Then it  decides  whether  to
++       \X  always  matches  at least one character. Then it decides whether to
+        add additional characters according to the following rules for ending a
+        cluster:
+ 
+        1. End at the end of the subject string.
+ 
+-       2. Do not end between CR and LF; otherwise end after any control  char-
++       2.  Do not end between CR and LF; otherwise end after any control char-
+        acter.
+ 
+-       3.  Do  not  break  Hangul (a Korean script) syllable sequences. Hangul
+-       characters are of five types: L, V, T, LV, and LVT. An L character  may
+-       be  followed by an L, V, LV, or LVT character; an LV or V character may
++       3. Do not break Hangul (a Korean  script)  syllable  sequences.  Hangul
++       characters  are of five types: L, V, T, LV, and LVT. An L character may
++       be followed by an L, V, LV, or LVT character; an LV or V character  may
+        be followed by a V or T character; an LVT or T character may be follwed
+        only by a T character.
+ 
+-       4.  Do not end before extending characters or spacing marks. Characters
+-       with the "mark" property always have  the  "extend"  grapheme  breaking
++       4. Do not end before extending characters or spacing marks.  Characters
++       with  the  "mark"  property  always have the "extend" grapheme breaking
+        property.
+ 
+        5. Do not end after prepend characters.
+@@ -5482,9 +5506,9 @@ BACKSLASH
+ 
+    PCRE's additional properties
+ 
+-       As  well  as the standard Unicode properties described above, PCRE sup-
+-       ports four more that make it possible  to  convert  traditional  escape
+-       sequences  such as \w and \s to use Unicode properties. PCRE uses these
++       As well as the standard Unicode properties described above,  PCRE  sup-
++       ports  four  more  that  make it possible to convert traditional escape
++       sequences such as \w and \s to use Unicode properties. PCRE uses  these
+        non-standard, non-Perl properties internally when PCRE_UCP is set. How-
+        ever, they may also be used explicitly. These properties are:
+ 
+@@ -5493,54 +5517,54 @@ BACKSLASH
+          Xsp   Any Perl space character
+          Xwd   Any Perl "word" character
+ 
+-       Xan  matches  characters that have either the L (letter) or the N (num-
+-       ber) property. Xps matches the characters tab, linefeed, vertical  tab,
+-       form  feed,  or carriage return, and any other character that has the Z
+-       (separator) property.  Xsp is the same as Xps; it used to exclude  ver-
+-       tical  tab,  for Perl compatibility, but Perl changed, and so PCRE fol-
+-       lowed at release 8.34. Xwd matches the same  characters  as  Xan,  plus
++       Xan matches characters that have either the L (letter) or the  N  (num-
++       ber)  property. Xps matches the characters tab, linefeed, vertical tab,
++       form feed, or carriage return, and any other character that has  the  Z
++       (separator)  property.  Xsp is the same as Xps; it used to exclude ver-
++       tical tab, for Perl compatibility, but Perl changed, and so  PCRE  fol-
++       lowed  at  release  8.34.  Xwd matches the same characters as Xan, plus
+        underscore.
+ 
+-       There  is another non-standard property, Xuc, which matches any charac-
+-       ter that can be represented by a Universal Character Name  in  C++  and
+-       other  programming  languages.  These are the characters $, @, ` (grave
+-       accent), and all characters with Unicode code points  greater  than  or
+-       equal  to U+00A0, except for the surrogates U+D800 to U+DFFF. Note that
+-       most base (ASCII) characters are excluded. (Universal  Character  Names
+-       are  of  the  form \uHHHH or \UHHHHHHHH where H is a hexadecimal digit.
++       There is another non-standard property, Xuc, which matches any  charac-
++       ter  that  can  be represented by a Universal Character Name in C++ and
++       other programming languages. These are the characters $,  @,  `  (grave
++       accent),  and  all  characters with Unicode code points greater than or
++       equal to U+00A0, except for the surrogates U+D800 to U+DFFF. Note  that
++       most  base  (ASCII) characters are excluded. (Universal Character Names
++       are of the form \uHHHH or \UHHHHHHHH where H is  a  hexadecimal  digit.
+        Note that the Xuc property does not match these sequences but the char-
+        acters that they represent.)
+ 
+    Resetting the match start
+ 
+-       The  escape sequence \K causes any previously matched characters not to
++       The escape sequence \K causes any previously matched characters not  to
+        be included in the final matched sequence. For example, the pattern:
+ 
+          foo\Kbar
+ 
+-       matches "foobar", but reports that it has matched "bar".  This  feature
+-       is  similar  to  a lookbehind assertion (described below).  However, in
+-       this case, the part of the subject before the real match does not  have
+-       to  be of fixed length, as lookbehind assertions do. The use of \K does
+-       not interfere with the setting of captured  substrings.   For  example,
++       matches  "foobar",  but reports that it has matched "bar". This feature
++       is similar to a lookbehind assertion (described  below).   However,  in
++       this  case, the part of the subject before the real match does not have
++       to be of fixed length, as lookbehind assertions do. The use of \K  does
++       not  interfere  with  the setting of captured substrings.  For example,
+        when the pattern
+ 
+          (foo)\Kbar
+ 
+        matches "foobar", the first substring is still set to "foo".
+ 
+-       Perl  documents  that  the  use  of  \K  within assertions is "not well
+-       defined". In PCRE, \K is acted upon  when  it  occurs  inside  positive
+-       assertions,  but  is  ignored  in negative assertions. Note that when a
+-       pattern such as (?=ab\K) matches, the reported start of the  match  can
++       Perl documents that the use  of  \K  within  assertions  is  "not  well
++       defined".  In  PCRE,  \K  is  acted upon when it occurs inside positive
++       assertions, but is ignored in negative assertions.  Note  that  when  a
++       pattern  such  as (?=ab\K) matches, the reported start of the match can
+        be greater than the end of the match.
+ 
+    Simple assertions
+ 
+-       The  final use of backslash is for certain simple assertions. An asser-
+-       tion specifies a condition that has to be met at a particular point  in
+-       a  match, without consuming any characters from the subject string. The
+-       use of subpatterns for more complicated assertions is described  below.
++       The final use of backslash is for certain simple assertions. An  asser-
++       tion  specifies a condition that has to be met at a particular point in
++       a match, without consuming any characters from the subject string.  The
++       use  of subpatterns for more complicated assertions is described below.
+        The backslashed assertions are:
+ 
+          \b     matches at a word boundary
+@@ -5551,161 +5575,161 @@ BACKSLASH
+          \z     matches only at the end of the subject
+          \G     matches at the first matching position in the subject
+ 
+-       Inside  a  character  class, \b has a different meaning; it matches the
+-       backspace character. If any other of  these  assertions  appears  in  a
+-       character  class, by default it matches the corresponding literal char-
++       Inside a character class, \b has a different meaning;  it  matches  the
++       backspace  character.  If  any  other  of these assertions appears in a
++       character class, by default it matches the corresponding literal  char-
+        acter  (for  example,  \B  matches  the  letter  B).  However,  if  the
+-       PCRE_EXTRA  option is set, an "invalid escape sequence" error is gener-
++       PCRE_EXTRA option is set, an "invalid escape sequence" error is  gener-
+        ated instead.
+ 
+-       A word boundary is a position in the subject string where  the  current
+-       character  and  the previous character do not both match \w or \W (i.e.
+-       one matches \w and the other matches \W), or the start or  end  of  the
+-       string  if  the  first or last character matches \w, respectively. In a
+-       UTF mode, the meanings of \w and \W  can  be  changed  by  setting  the
+-       PCRE_UCP  option. When this is done, it also affects \b and \B. Neither
+-       PCRE nor Perl has a separate "start of word" or "end of  word"  metase-
+-       quence.  However,  whatever follows \b normally determines which it is.
++       A  word  boundary is a position in the subject string where the current
++       character and the previous character do not both match \w or  \W  (i.e.
++       one  matches  \w  and the other matches \W), or the start or end of the
++       string if the first or last character matches \w,  respectively.  In  a
++       UTF  mode,  the  meanings  of  \w  and \W can be changed by setting the
++       PCRE_UCP option. When this is done, it also affects \b and \B.  Neither
++       PCRE  nor  Perl has a separate "start of word" or "end of word" metase-
++       quence. However, whatever follows \b normally determines which  it  is.
+        For example, the fragment \ba matches "a" at the start of a word.
+ 
+-       The \A, \Z, and \z assertions differ from  the  traditional  circumflex
++       The  \A,  \Z,  and \z assertions differ from the traditional circumflex
+        and dollar (described in the next section) in that they only ever match
+-       at the very start and end of the subject string, whatever  options  are
+-       set.  Thus,  they are independent of multiline mode. These three asser-
++       at  the  very start and end of the subject string, whatever options are
++       set. Thus, they are independent of multiline mode. These  three  asser-
+        tions are not affected by the PCRE_NOTBOL or PCRE_NOTEOL options, which
+-       affect  only the behaviour of the circumflex and dollar metacharacters.
+-       However, if the startoffset argument of pcre_exec() is non-zero,  indi-
++       affect only the behaviour of the circumflex and dollar  metacharacters.
++       However,  if the startoffset argument of pcre_exec() is non-zero, indi-
+        cating that matching is to start at a point other than the beginning of
+-       the subject, \A can never match. The difference between \Z  and  \z  is
++       the  subject,  \A  can never match. The difference between \Z and \z is
+        that \Z matches before a newline at the end of the string as well as at
+        the very end, whereas \z matches only at the end.
+ 
+-       The \G assertion is true only when the current matching position is  at
+-       the  start point of the match, as specified by the startoffset argument
+-       of pcre_exec(). It differs from \A when the  value  of  startoffset  is
+-       non-zero.  By calling pcre_exec() multiple times with appropriate argu-
++       The  \G assertion is true only when the current matching position is at
++       the start point of the match, as specified by the startoffset  argument
++       of  pcre_exec().  It  differs  from \A when the value of startoffset is
++       non-zero. By calling pcre_exec() multiple times with appropriate  argu-
+        ments, you can mimic Perl's /g option, and it is in this kind of imple-
+        mentation where \G can be useful.
+ 
+-       Note,  however,  that  PCRE's interpretation of \G, as the start of the
++       Note, however, that PCRE's interpretation of \G, as the  start  of  the
+        current match, is subtly different from Perl's, which defines it as the
+-       end  of  the  previous  match. In Perl, these can be different when the
+-       previously matched string was empty. Because PCRE does just  one  match
++       end of the previous match. In Perl, these can  be  different  when  the
++       previously  matched  string was empty. Because PCRE does just one match
+        at a time, it cannot reproduce this behaviour.
+ 
+-       If  all  the alternatives of a pattern begin with \G, the expression is
++       If all the alternatives of a pattern begin with \G, the  expression  is
+        anchored to the starting match position, and the "anchored" flag is set
+        in the compiled regular expression.
+ 
+ 
+ CIRCUMFLEX AND DOLLAR
+ 
+-       The  circumflex  and  dollar  metacharacters are zero-width assertions.
+-       That is, they test for a particular condition being true  without  con-
++       The circumflex and dollar  metacharacters  are  zero-width  assertions.
++       That  is,  they test for a particular condition being true without con-
+        suming any characters from the subject string.
+ 
+        Outside a character class, in the default matching mode, the circumflex
+-       character is an assertion that is true only  if  the  current  matching
+-       point  is  at the start of the subject string. If the startoffset argu-
+-       ment of pcre_exec() is non-zero, circumflex  can  never  match  if  the
+-       PCRE_MULTILINE  option  is  unset. Inside a character class, circumflex
++       character  is  an  assertion  that is true only if the current matching
++       point is at the start of the subject string. If the  startoffset  argu-
++       ment  of  pcre_exec()  is  non-zero,  circumflex can never match if the
++       PCRE_MULTILINE option is unset. Inside a  character  class,  circumflex
+        has an entirely different meaning (see below).
+ 
+-       Circumflex need not be the first character of the pattern if  a  number
+-       of  alternatives are involved, but it should be the first thing in each
+-       alternative in which it appears if the pattern is ever  to  match  that
+-       branch.  If all possible alternatives start with a circumflex, that is,
+-       if the pattern is constrained to match only at the start  of  the  sub-
+-       ject,  it  is  said  to be an "anchored" pattern. (There are also other
++       Circumflex  need  not be the first character of the pattern if a number
++       of alternatives are involved, but it should be the first thing in  each
++       alternative  in  which  it appears if the pattern is ever to match that
++       branch. If all possible alternatives start with a circumflex, that  is,
++       if  the  pattern  is constrained to match only at the start of the sub-
++       ject, it is said to be an "anchored" pattern.  (There  are  also  other
+        constructs that can cause a pattern to be anchored.)
+ 
+-       The dollar character is an assertion that is true only if  the  current
+-       matching  point  is  at  the  end of the subject string, or immediately
+-       before a newline at the end of the string (by default). Note,  however,
+-       that  it  does  not  actually match the newline. Dollar need not be the
++       The  dollar  character is an assertion that is true only if the current
++       matching point is at the end of  the  subject  string,  or  immediately
++       before  a newline at the end of the string (by default). Note, however,
++       that it does not actually match the newline. Dollar  need  not  be  the
+        last character of the pattern if a number of alternatives are involved,
+-       but  it should be the last item in any branch in which it appears. Dol-
++       but it should be the last item in any branch in which it appears.  Dol-
+        lar has no special meaning in a character class.
+ 
+-       The meaning of dollar can be changed so that it  matches  only  at  the
+-       very  end  of  the string, by setting the PCRE_DOLLAR_ENDONLY option at
++       The  meaning  of  dollar  can be changed so that it matches only at the
++       very end of the string, by setting the  PCRE_DOLLAR_ENDONLY  option  at
+        compile time. This does not affect the \Z assertion.
+ 
+        The meanings of the circumflex and dollar characters are changed if the
+-       PCRE_MULTILINE  option  is  set.  When  this  is the case, a circumflex
+-       matches immediately after internal newlines as well as at the start  of
+-       the  subject  string.  It  does not match after a newline that ends the
+-       string. A dollar matches before any newlines in the string, as well  as
+-       at  the very end, when PCRE_MULTILINE is set. When newline is specified
+-       as the two-character sequence CRLF, isolated CR and  LF  characters  do
++       PCRE_MULTILINE option is set. When  this  is  the  case,  a  circumflex
++       matches  immediately after internal newlines as well as at the start of
++       the subject string. It does not match after a  newline  that  ends  the
++       string.  A dollar matches before any newlines in the string, as well as
++       at the very end, when PCRE_MULTILINE is set. When newline is  specified
++       as  the  two-character  sequence CRLF, isolated CR and LF characters do
+        not indicate newlines.
+ 
+-       For  example, the pattern /^abc$/ matches the subject string "def\nabc"
+-       (where \n represents a newline) in multiline mode, but  not  otherwise.
+-       Consequently,  patterns  that  are anchored in single line mode because
+-       all branches start with ^ are not anchored in  multiline  mode,  and  a
+-       match  for  circumflex  is  possible  when  the startoffset argument of
+-       pcre_exec() is non-zero. The PCRE_DOLLAR_ENDONLY option is  ignored  if
++       For example, the pattern /^abc$/ matches the subject string  "def\nabc"
++       (where  \n  represents a newline) in multiline mode, but not otherwise.
++       Consequently, patterns that are anchored in single  line  mode  because
++       all  branches  start  with  ^ are not anchored in multiline mode, and a
++       match for circumflex is  possible  when  the  startoffset  argument  of
++       pcre_exec()  is  non-zero. The PCRE_DOLLAR_ENDONLY option is ignored if
+        PCRE_MULTILINE is set.
+ 
+-       Note  that  the sequences \A, \Z, and \z can be used to match the start
+-       and end of the subject in both modes, and if all branches of a  pattern
+-       start  with  \A it is always anchored, whether or not PCRE_MULTILINE is
++       Note that the sequences \A, \Z, and \z can be used to match  the  start
++       and  end of the subject in both modes, and if all branches of a pattern
++       start with \A it is always anchored, whether or not  PCRE_MULTILINE  is
+        set.
+ 
+ 
+ FULL STOP (PERIOD, DOT) AND \N
+ 
+        Outside a character class, a dot in the pattern matches any one charac-
+-       ter  in  the subject string except (by default) a character that signi-
++       ter in the subject string except (by default) a character  that  signi-
+        fies the end of a line.
+ 
+-       When a line ending is defined as a single character, dot never  matches
+-       that  character; when the two-character sequence CRLF is used, dot does
+-       not match CR if it is immediately followed  by  LF,  but  otherwise  it
+-       matches  all characters (including isolated CRs and LFs). When any Uni-
+-       code line endings are being recognized, dot does not match CR or LF  or
++       When  a line ending is defined as a single character, dot never matches
++       that character; when the two-character sequence CRLF is used, dot  does
++       not  match  CR  if  it  is immediately followed by LF, but otherwise it
++       matches all characters (including isolated CRs and LFs). When any  Uni-
++       code  line endings are being recognized, dot does not match CR or LF or
+        any of the other line ending characters.
+ 
+-       The  behaviour  of  dot  with regard to newlines can be changed. If the
+-       PCRE_DOTALL option is set, a dot matches  any  one  character,  without
++       The behaviour of dot with regard to newlines can  be  changed.  If  the
++       PCRE_DOTALL  option  is  set,  a dot matches any one character, without
+        exception. If the two-character sequence CRLF is present in the subject
+        string, it takes two dots to match it.
+ 
+-       The handling of dot is entirely independent of the handling of  circum-
+-       flex  and  dollar,  the  only relationship being that they both involve
++       The  handling of dot is entirely independent of the handling of circum-
++       flex and dollar, the only relationship being  that  they  both  involve
+        newlines. Dot has no special meaning in a character class.
+ 
+-       The escape sequence \N behaves like  a  dot,  except  that  it  is  not
+-       affected  by  the  PCRE_DOTALL  option.  In other words, it matches any
+-       character except one that signifies the end of a line. Perl  also  uses
++       The  escape  sequence  \N  behaves  like  a  dot, except that it is not
++       affected by the PCRE_DOTALL option. In  other  words,  it  matches  any
++       character  except  one that signifies the end of a line. Perl also uses
+        \N to match characters by name; PCRE does not support this.
+ 
+ 
+ MATCHING A SINGLE DATA UNIT
+ 
+-       Outside  a character class, the escape sequence \C matches any one data
+-       unit, whether or not a UTF mode is set. In the 8-bit library, one  data
+-       unit  is  one  byte;  in the 16-bit library it is a 16-bit unit; in the
+-       32-bit library it is a 32-bit unit. Unlike a  dot,  \C  always  matches
+-       line-ending  characters.  The  feature  is provided in Perl in order to
++       Outside a character class, the escape sequence \C matches any one  data
++       unit,  whether or not a UTF mode is set. In the 8-bit library, one data
++       unit is one byte; in the 16-bit library it is a  16-bit  unit;  in  the
++       32-bit  library  it  is  a 32-bit unit. Unlike a dot, \C always matches
++       line-ending characters. The feature is provided in  Perl  in  order  to
+        match individual bytes in UTF-8 mode, but it is unclear how it can use-
+-       fully  be  used.  Because  \C breaks up characters into individual data
+-       units, matching one unit with \C in a UTF mode means that the  rest  of
++       fully be used. Because \C breaks up  characters  into  individual  data
++       units,  matching  one unit with \C in a UTF mode means that the rest of
+        the string may start with a malformed UTF character. This has undefined
+        results, because PCRE assumes that it is dealing with valid UTF strings
+-       (and  by  default  it checks this at the start of processing unless the
+-       PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK or  PCRE_NO_UTF32_CHECK  option
++       (and by default it checks this at the start of  processing  unless  the
++       PCRE_NO_UTF8_CHECK,  PCRE_NO_UTF16_CHECK  or PCRE_NO_UTF32_CHECK option
+        is used).
+ 
+-       PCRE  does  not  allow \C to appear in lookbehind assertions (described
+-       below) in a UTF mode, because this would make it impossible  to  calcu-
++       PCRE does not allow \C to appear in  lookbehind  assertions  (described
++       below)  in  a UTF mode, because this would make it impossible to calcu-
+        late the length of the lookbehind.
+ 
+        In general, the \C escape sequence is best avoided. However, one way of
+-       using it that avoids the problem of malformed UTF characters is to  use
+-       a  lookahead to check the length of the next character, as in this pat-
+-       tern, which could be used with a UTF-8 string (ignore white  space  and
++       using  it that avoids the problem of malformed UTF characters is to use
++       a lookahead to check the length of the next character, as in this  pat-
++       tern,  which  could be used with a UTF-8 string (ignore white space and
+        line breaks):
+ 
+          (?| (?=[\x00-\x7f])(\C) |
+@@ -5713,11 +5737,11 @@ MATCHING A SINGLE DATA UNIT
+              (?=[\x{800}-\x{ffff}])(\C)(\C)(\C) |
+              (?=[\x{10000}-\x{1fffff}])(\C)(\C)(\C)(\C))
+ 
+-       A  group  that starts with (?| resets the capturing parentheses numbers
+-       in each alternative (see "Duplicate  Subpattern  Numbers"  below).  The
+-       assertions  at  the start of each branch check the next UTF-8 character
+-       for values whose encoding uses 1, 2, 3, or 4 bytes,  respectively.  The
+-       character's  individual bytes are then captured by the appropriate num-
++       A group that starts with (?| resets the capturing  parentheses  numbers
++       in  each  alternative  (see  "Duplicate Subpattern Numbers" below). The
++       assertions at the start of each branch check the next  UTF-8  character
++       for  values  whose encoding uses 1, 2, 3, or 4 bytes, respectively. The
++       character's individual bytes are then captured by the appropriate  num-
+        ber of groups.
+ 
+ 
+@@ -5727,109 +5751,109 @@ SQUARE BRACKETS AND CHARACTER CLASSES
+        closing square bracket. A closing square bracket on its own is not spe-
+        cial by default.  However, if the PCRE_JAVASCRIPT_COMPAT option is set,
+        a lone closing square bracket causes a compile-time error. If a closing
+-       square bracket is required as a member of the class, it should  be  the
+-       first  data  character  in  the  class (after an initial circumflex, if
++       square  bracket  is required as a member of the class, it should be the
++       first data character in the class  (after  an  initial  circumflex,  if
+        present) or escaped with a backslash.
+ 
+-       A character class matches a single character in the subject. In  a  UTF
+-       mode,  the  character  may  be  more than one data unit long. A matched
++       A  character  class matches a single character in the subject. In a UTF
++       mode, the character may be more than one  data  unit  long.  A  matched
+        character must be in the set of characters defined by the class, unless
+-       the  first  character in the class definition is a circumflex, in which
++       the first character in the class definition is a circumflex,  in  which
+        case the subject character must not be in the set defined by the class.
+-       If  a  circumflex is actually required as a member of the class, ensure
++       If a circumflex is actually required as a member of the  class,  ensure
+        it is not the first character, or escape it with a backslash.
+ 
+-       For example, the character class [aeiou] matches any lower case  vowel,
+-       while  [^aeiou]  matches  any character that is not a lower case vowel.
++       For  example, the character class [aeiou] matches any lower case vowel,
++       while [^aeiou] matches any character that is not a  lower  case  vowel.
+        Note that a circumflex is just a convenient notation for specifying the
+-       characters  that  are in the class by enumerating those that are not. A
+-       class that starts with a circumflex is not an assertion; it still  con-
+-       sumes  a  character  from the subject string, and therefore it fails if
++       characters that are in the class by enumerating those that are  not.  A
++       class  that starts with a circumflex is not an assertion; it still con-
++       sumes a character from the subject string, and therefore  it  fails  if
+        the current pointer is at the end of the string.
+ 
+        In UTF-8 (UTF-16, UTF-32) mode, characters with values greater than 255
+-       (0xffff)  can be included in a class as a literal string of data units,
++       (0xffff) can be included in a class as a literal string of data  units,
+        or by using the \x{ escaping mechanism.
+ 
+-       When caseless matching is set, any letters in a  class  represent  both
+-       their  upper  case  and lower case versions, so for example, a caseless
+-       [aeiou] matches "A" as well as "a", and a caseless  [^aeiou]  does  not
+-       match  "A", whereas a caseful version would. In a UTF mode, PCRE always
+-       understands the concept of case for characters whose  values  are  less
+-       than  128, so caseless matching is always possible. For characters with
+-       higher values, the concept of case is supported  if  PCRE  is  compiled
+-       with  Unicode  property support, but not otherwise.  If you want to use
+-       caseless matching in a UTF mode for characters 128 and above, you  must
+-       ensure  that  PCRE is compiled with Unicode property support as well as
++       When  caseless  matching  is set, any letters in a class represent both
++       their upper case and lower case versions, so for  example,  a  caseless
++       [aeiou]  matches  "A"  as well as "a", and a caseless [^aeiou] does not
++       match "A", whereas a caseful version would. In a UTF mode, PCRE  always
++       understands  the  concept  of case for characters whose values are less
++       than 128, so caseless matching is always possible. For characters  with
++       higher  values,  the  concept  of case is supported if PCRE is compiled
++       with Unicode property support, but not otherwise.  If you want  to  use
++       caseless  matching in a UTF mode for characters 128 and above, you must
++       ensure that PCRE is compiled with Unicode property support as  well  as
+        with UTF support.
+ 
+-       Characters that might indicate line breaks are  never  treated  in  any
+-       special  way  when  matching  character  classes,  whatever line-ending
+-       sequence is in  use,  and  whatever  setting  of  the  PCRE_DOTALL  and
++       Characters  that  might  indicate  line breaks are never treated in any
++       special way  when  matching  character  classes,  whatever  line-ending
++       sequence  is  in  use,  and  whatever  setting  of  the PCRE_DOTALL and
+        PCRE_MULTILINE options is used. A class such as [^a] always matches one
+        of these characters.
+ 
+-       The minus (hyphen) character can be used to specify a range of  charac-
+-       ters  in  a  character  class.  For  example,  [d-m] matches any letter
+-       between d and m, inclusive. If a  minus  character  is  required  in  a
+-       class,  it  must  be  escaped  with a backslash or appear in a position
+-       where it cannot be interpreted as indicating a range, typically as  the
++       The  minus (hyphen) character can be used to specify a range of charac-
++       ters in a character  class.  For  example,  [d-m]  matches  any  letter
++       between  d  and  m,  inclusive.  If  a minus character is required in a
++       class, it must be escaped with a backslash  or  appear  in  a  position
++       where  it cannot be interpreted as indicating a range, typically as the
+        first or last character in the class, or immediately after a range. For
+-       example, [b-d-z] matches letters in the range b to d, a hyphen  charac-
++       example,  [b-d-z] matches letters in the range b to d, a hyphen charac-
+        ter, or z.
+ 
+        It is not possible to have the literal character "]" as the end charac-
+-       ter of a range. A pattern such as [W-]46] is interpreted as a class  of
+-       two  characters ("W" and "-") followed by a literal string "46]", so it
+-       would match "W46]" or "-46]". However, if the "]"  is  escaped  with  a
+-       backslash  it is interpreted as the end of range, so [W-\]46] is inter-
+-       preted as a class containing a range followed by two other  characters.
+-       The  octal or hexadecimal representation of "]" can also be used to end
++       ter  of a range. A pattern such as [W-]46] is interpreted as a class of
++       two characters ("W" and "-") followed by a literal string "46]", so  it
++       would  match  "W46]"  or  "-46]". However, if the "]" is escaped with a
++       backslash it is interpreted as the end of range, so [W-\]46] is  inter-
++       preted  as a class containing a range followed by two other characters.
++       The octal or hexadecimal representation of "]" can also be used to  end
+        a range.
+ 
+-       An error is generated if a POSIX character  class  (see  below)  or  an
+-       escape  sequence other than one that defines a single character appears
+-       at a point where a range ending character  is  expected.  For  example,
++       An  error  is  generated  if  a POSIX character class (see below) or an
++       escape sequence other than one that defines a single character  appears
++       at  a  point  where  a range ending character is expected. For example,
+        [z-\xff] is valid, but [A-\d] and [A-[:digit:]] are not.
+ 
+-       Ranges  operate in the collating sequence of character values. They can
+-       also  be  used  for  characters  specified  numerically,  for   example
+-       [\000-\037].  Ranges  can include any characters that are valid for the
++       Ranges operate in the collating sequence of character values. They  can
++       also   be  used  for  characters  specified  numerically,  for  example
++       [\000-\037]. Ranges can include any characters that are valid  for  the
+        current mode.
+ 
+        If a range that includes letters is used when caseless matching is set,
+        it matches the letters in either case. For example, [W-c] is equivalent
+-       to [][\\^_`wxyzabc], matched caselessly, and  in  a  non-UTF  mode,  if
+-       character  tables  for  a French locale are in use, [\xc8-\xcb] matches
+-       accented E characters in both cases. In UTF modes,  PCRE  supports  the
+-       concept  of  case for characters with values greater than 128 only when
++       to  [][\\^_`wxyzabc],  matched  caselessly,  and  in a non-UTF mode, if
++       character tables for a French locale are in  use,  [\xc8-\xcb]  matches
++       accented  E  characters  in both cases. In UTF modes, PCRE supports the
++       concept of case for characters with values greater than 128  only  when
+        it is compiled with Unicode property support.
+ 
+-       The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v,  \V,
++       The  character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v, \V,
+        \w, and \W may appear in a character class, and add the characters that
+-       they match to the class. For example, [\dABCDEF] matches any  hexadeci-
+-       mal  digit.  In  UTF modes, the PCRE_UCP option affects the meanings of
+-       \d, \s, \w and their upper case partners, just as  it  does  when  they
+-       appear  outside a character class, as described in the section entitled
++       they  match to the class. For example, [\dABCDEF] matches any hexadeci-
++       mal digit. In UTF modes, the PCRE_UCP option affects  the  meanings  of
++       \d,  \s,  \w  and  their upper case partners, just as it does when they
++       appear outside a character class, as described in the section  entitled
+        "Generic character types" above. The escape sequence \b has a different
+-       meaning  inside  a character class; it matches the backspace character.
+-       The sequences \B, \N, \R, and \X are not  special  inside  a  character
+-       class.  Like  any other unrecognized escape sequences, they are treated
+-       as the literal characters "B", "N", "R", and "X" by default, but  cause
++       meaning inside a character class; it matches the  backspace  character.
++       The  sequences  \B,  \N,  \R, and \X are not special inside a character
++       class. Like any other unrecognized escape sequences, they  are  treated
++       as  the literal characters "B", "N", "R", and "X" by default, but cause
+        an error if the PCRE_EXTRA option is set.
+ 
+-       A  circumflex  can  conveniently  be used with the upper case character
+-       types to specify a more restricted set of characters than the  matching
+-       lower  case  type.  For example, the class [^\W_] matches any letter or
++       A circumflex can conveniently be used with  the  upper  case  character
++       types  to specify a more restricted set of characters than the matching
++       lower case type.  For example, the class [^\W_] matches any  letter  or
+        digit, but not underscore, whereas [\w] includes underscore. A positive
+        character class should be read as "something OR something OR ..." and a
+        negative class as "NOT something AND NOT something AND NOT ...".
+ 
+-       The only metacharacters that are recognized in  character  classes  are
+-       backslash,  hyphen  (only  where  it can be interpreted as specifying a
+-       range), circumflex (only at the start), opening  square  bracket  (only
+-       when  it can be interpreted as introducing a POSIX class name, or for a
+-       special compatibility feature - see the next  two  sections),  and  the
++       The  only  metacharacters  that are recognized in character classes are
++       backslash, hyphen (only where it can be  interpreted  as  specifying  a
++       range),  circumflex  (only  at the start), opening square bracket (only
++       when it can be interpreted as introducing a POSIX class name, or for  a
++       special  compatibility  feature  -  see the next two sections), and the
+        terminating  closing  square  bracket.  However,  escaping  other  non-
+        alphanumeric characters does no harm.
+ 
+@@ -5837,7 +5861,7 @@ SQUARE BRACKETS AND CHARACTER CLASSES
+ POSIX CHARACTER CLASSES
+ 
+        Perl supports the POSIX notation for character classes. This uses names
+-       enclosed  by  [: and :] within the enclosing square brackets. PCRE also
++       enclosed by [: and :] within the enclosing square brackets.  PCRE  also
+        supports this notation. For example,
+ 
+          [01[:alpha:]%]
+@@ -5860,28 +5884,28 @@ POSIX CHARACTER CLASSES
+          word     "word" characters (same as \w)
+          xdigit   hexadecimal digits
+ 
+-       The  default  "space" characters are HT (9), LF (10), VT (11), FF (12),
+-       CR (13), and space (32). If locale-specific matching is  taking  place,
+-       the  list  of  space characters may be different; there may be fewer or
++       The default "space" characters are HT (9), LF (10), VT (11),  FF  (12),
++       CR  (13),  and space (32). If locale-specific matching is taking place,
++       the list of space characters may be different; there may  be  fewer  or
+        more of them. "Space" used to be different to \s, which did not include
+        VT, for Perl compatibility.  However, Perl changed at release 5.18, and
+-       PCRE followed at release 8.34.  "Space" and \s now match the  same  set
++       PCRE  followed  at release 8.34.  "Space" and \s now match the same set
+        of characters.
+ 
+-       The  name  "word"  is  a Perl extension, and "blank" is a GNU extension
+-       from Perl 5.8. Another Perl extension is negation, which  is  indicated
++       The name "word" is a Perl extension, and "blank"  is  a  GNU  extension
++       from  Perl  5.8. Another Perl extension is negation, which is indicated
+        by a ^ character after the colon. For example,
+ 
+          [12[:^digit:]]
+ 
+-       matches  "1", "2", or any non-digit. PCRE (and Perl) also recognize the
++       matches "1", "2", or any non-digit. PCRE (and Perl) also recognize  the
+        POSIX syntax [.ch.] and [=ch=] where "ch" is a "collating element", but
+        these are not supported, and an error is given if they are encountered.
+ 
+        By default, characters with values greater than 128 do not match any of
+-       the POSIX character classes. However, if the PCRE_UCP option is  passed
+-       to  pcre_compile(),  some  of  the  classes are changed so that Unicode
+-       character properties are used. This is achieved  by  replacing  certain
++       the  POSIX character classes. However, if the PCRE_UCP option is passed
++       to pcre_compile(), some of the classes  are  changed  so  that  Unicode
++       character  properties  are  used. This is achieved by replacing certain
+        POSIX classes by other sequences, as follows:
+ 
+          [:alnum:]  becomes  \p{Xan}
+@@ -5893,10 +5917,10 @@ POSIX CHARACTER CLASSES
+          [:upper:]  becomes  \p{Lu}
+          [:word:]   becomes  \p{Xwd}
+ 
+-       Negated  versions, such as [:^alpha:] use \P instead of \p. Three other
++       Negated versions, such as [:^alpha:] use \P instead of \p. Three  other
+        POSIX classes are handled specially in UCP mode:
+ 
+-       [:graph:] This matches characters that have glyphs that mark  the  page
++       [:graph:] This  matches  characters that have glyphs that mark the page
+                  when printed. In Unicode property terms, it matches all char-
+                  acters with the L, M, N, P, S, or Cf properties, except for:
+ 
+@@ -5905,58 +5929,58 @@ POSIX CHARACTER CLASSES
+                    U+2066 - U+2069  Various "isolate"s
+ 
+ 
+-       [:print:] This matches the same  characters  as  [:graph:]  plus  space
+-                 characters  that  are  not controls, that is, characters with
++       [:print:] This  matches  the  same  characters  as [:graph:] plus space
++                 characters that are not controls, that  is,  characters  with
+                  the Zs property.
+ 
+        [:punct:] This matches all characters that have the Unicode P (punctua-
+-                 tion)  property,  plus those characters whose code points are
++                 tion) property, plus those characters whose code  points  are
+                  less than 128 that have the S (Symbol) property.
+ 
+-       The other POSIX classes are unchanged, and match only  characters  with
++       The  other  POSIX classes are unchanged, and match only characters with
+        code points less than 128.
+ 
+ 
+ COMPATIBILITY FEATURE FOR WORD BOUNDARIES
+ 
+-       In  the POSIX.2 compliant library that was included in 4.4BSD Unix, the
+-       ugly syntax [[:<:]] and [[:>:]] is used for matching  "start  of  word"
++       In the POSIX.2 compliant library that was included in 4.4BSD Unix,  the
++       ugly  syntax  [[:<:]]  and [[:>:]] is used for matching "start of word"
+        and "end of word". PCRE treats these items as follows:
+ 
+          [[:<:]]  is converted to  \b(?=\w)
+          [[:>:]]  is converted to  \b(?<=\w)
+ 
+        Only these exact character sequences are recognized. A sequence such as
+-       [a[:<:]b] provokes error for an unrecognized  POSIX  class  name.  This
+-       support  is not compatible with Perl. It is provided to help migrations
++       [a[:<:]b]  provokes  error  for  an unrecognized POSIX class name. This
++       support is not compatible with Perl. It is provided to help  migrations
+        from other environments, and is best not used in any new patterns. Note
+-       that  \b matches at the start and the end of a word (see "Simple asser-
+-       tions" above), and in a Perl-style pattern the preceding  or  following
+-       character  normally  shows  which  is  wanted, without the need for the
+-       assertions that are used above in order to give exactly the  POSIX  be-
++       that \b matches at the start and the end of a word (see "Simple  asser-
++       tions"  above),  and in a Perl-style pattern the preceding or following
++       character normally shows which is wanted,  without  the  need  for  the
++       assertions  that  are used above in order to give exactly the POSIX be-
+        haviour.
+ 
+ 
+ VERTICAL BAR
+ 
+-       Vertical  bar characters are used to separate alternative patterns. For
++       Vertical bar characters are used to separate alternative patterns.  For
+        example, the pattern
+ 
+          gilbert|sullivan
+ 
+-       matches either "gilbert" or "sullivan". Any number of alternatives  may
+-       appear,  and  an  empty  alternative  is  permitted (matching the empty
++       matches  either "gilbert" or "sullivan". Any number of alternatives may
++       appear, and an empty  alternative  is  permitted  (matching  the  empty
+        string). The matching process tries each alternative in turn, from left
+-       to  right, and the first one that succeeds is used. If the alternatives
+-       are within a subpattern (defined below), "succeeds" means matching  the
++       to right, and the first one that succeeds is used. If the  alternatives
++       are  within a subpattern (defined below), "succeeds" means matching the
+        rest of the main pattern as well as the alternative in the subpattern.
+ 
+ 
+ INTERNAL OPTION SETTING
+ 
+-       The  settings  of  the  PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and
+-       PCRE_EXTENDED options (which are Perl-compatible) can be  changed  from
+-       within  the  pattern  by  a  sequence  of  Perl option letters enclosed
++       The settings of the  PCRE_CASELESS,  PCRE_MULTILINE,  PCRE_DOTALL,  and
++       PCRE_EXTENDED  options  (which are Perl-compatible) can be changed from
++       within the pattern by  a  sequence  of  Perl  option  letters  enclosed
+        between "(?" and ")".  The option letters are
+ 
+          i  for PCRE_CASELESS
+@@ -5966,51 +5990,51 @@ INTERNAL OPTION SETTING
+ 
+        For example, (?im) sets caseless, multiline matching. It is also possi-
+        ble to unset these options by preceding the letter with a hyphen, and a
+-       combined setting and unsetting such as (?im-sx), which sets  PCRE_CASE-
+-       LESS  and PCRE_MULTILINE while unsetting PCRE_DOTALL and PCRE_EXTENDED,
+-       is also permitted. If a  letter  appears  both  before  and  after  the
++       combined  setting and unsetting such as (?im-sx), which sets PCRE_CASE-
++       LESS and PCRE_MULTILINE while unsetting PCRE_DOTALL and  PCRE_EXTENDED,
++       is  also  permitted.  If  a  letter  appears  both before and after the
+        hyphen, the option is unset.
+ 
+-       The  PCRE-specific options PCRE_DUPNAMES, PCRE_UNGREEDY, and PCRE_EXTRA
+-       can be changed in the same way as the Perl-compatible options by  using
++       The PCRE-specific options PCRE_DUPNAMES, PCRE_UNGREEDY, and  PCRE_EXTRA
++       can  be changed in the same way as the Perl-compatible options by using
+        the characters J, U and X respectively.
+ 
+-       When  one  of  these  option  changes occurs at top level (that is, not
+-       inside subpattern parentheses), the change applies to the remainder  of
++       When one of these option changes occurs at  top  level  (that  is,  not
++       inside  subpattern parentheses), the change applies to the remainder of
+        the pattern that follows. If the change is placed right at the start of
+        a pattern, PCRE extracts it into the global options (and it will there-
+        fore show up in data extracted by the pcre_fullinfo() function).
+ 
+-       An  option  change  within a subpattern (see below for a description of
+-       subpatterns) affects only that part of the subpattern that follows  it,
++       An option change within a subpattern (see below for  a  description  of
++       subpatterns)  affects only that part of the subpattern that follows it,
+        so
+ 
+          (a(?i)b)c
+ 
+        matches abc and aBc and no other strings (assuming PCRE_CASELESS is not
+-       used).  By this means, options can be made to have  different  settings
+-       in  different parts of the pattern. Any changes made in one alternative
+-       do carry on into subsequent branches within the  same  subpattern.  For
++       used).   By  this means, options can be made to have different settings
++       in different parts of the pattern. Any changes made in one  alternative
++       do  carry  on  into subsequent branches within the same subpattern. For
+        example,
+ 
+          (a(?i)b|c)
+ 
+-       matches  "ab",  "aB",  "c",  and "C", even though when matching "C" the
+-       first branch is abandoned before the option setting.  This  is  because
+-       the  effects  of option settings happen at compile time. There would be
++       matches "ab", "aB", "c", and "C", even though  when  matching  "C"  the
++       first  branch  is  abandoned before the option setting. This is because
++       the effects of option settings happen at compile time. There  would  be
+        some very weird behaviour otherwise.
+ 
+-       Note: There are other PCRE-specific options that  can  be  set  by  the
+-       application  when  the  compiling  or matching functions are called. In
+-       some cases the pattern can contain special leading  sequences  such  as
+-       (*CRLF)  to  override  what  the  application  has set or what has been
+-       defaulted.  Details  are  given  in  the  section   entitled   "Newline
+-       sequences"  above.  There  are also the (*UTF8), (*UTF16),(*UTF32), and
+-       (*UCP) leading sequences that can be used to set UTF and Unicode  prop-
+-       erty  modes;  they are equivalent to setting the PCRE_UTF8, PCRE_UTF16,
+-       PCRE_UTF32 and the PCRE_UCP options, respectively. The (*UTF)  sequence
+-       is  a  generic version that can be used with any of the libraries. How-
+-       ever, the application can set the PCRE_NEVER_UTF  option,  which  locks
++       Note:  There  are  other  PCRE-specific  options that can be set by the
++       application when the compiling or matching  functions  are  called.  In
++       some  cases  the  pattern can contain special leading sequences such as
++       (*CRLF) to override what the application  has  set  or  what  has  been
++       defaulted.   Details   are  given  in  the  section  entitled  "Newline
++       sequences" above. There are also the  (*UTF8),  (*UTF16),(*UTF32),  and
++       (*UCP)  leading sequences that can be used to set UTF and Unicode prop-
++       erty modes; they are equivalent to setting the  PCRE_UTF8,  PCRE_UTF16,
++       PCRE_UTF32  and the PCRE_UCP options, respectively. The (*UTF) sequence
++       is a generic version that can be used with any of the  libraries.  How-
++       ever,  the  application  can set the PCRE_NEVER_UTF option, which locks
+        out the use of the (*UTF) sequences.
+ 
+ 
+@@ -6023,18 +6047,18 @@ SUBPATTERNS
+ 
+          cat(aract|erpillar|)
+ 
+-       matches "cataract", "caterpillar", or "cat". Without  the  parentheses,
++       matches  "cataract",  "caterpillar", or "cat". Without the parentheses,
+        it would match "cataract", "erpillar" or an empty string.
+ 
+-       2.  It  sets  up  the  subpattern as a capturing subpattern. This means
+-       that, when the whole pattern  matches,  that  portion  of  the  subject
++       2. It sets up the subpattern as  a  capturing  subpattern.  This  means
++       that,  when  the  whole  pattern  matches,  that portion of the subject
+        string that matched the subpattern is passed back to the caller via the
+-       ovector argument of the matching function. (This applies  only  to  the
+-       traditional  matching functions; the DFA matching functions do not sup-
++       ovector  argument  of  the matching function. (This applies only to the
++       traditional matching functions; the DFA matching functions do not  sup-
+        port capturing.)
+ 
+        Opening parentheses are counted from left to right (starting from 1) to
+-       obtain  numbers  for  the  capturing  subpatterns.  For example, if the
++       obtain numbers for the  capturing  subpatterns.  For  example,  if  the
+        string "the red king" is matched against the pattern
+ 
+          the ((red|white) (king|queen))
+@@ -6042,12 +6066,12 @@ SUBPATTERNS
+        the captured substrings are "red king", "red", and "king", and are num-
+        bered 1, 2, and 3, respectively.
+ 
+-       The  fact  that  plain  parentheses  fulfil two functions is not always
+-       helpful.  There are often times when a grouping subpattern is  required
+-       without  a capturing requirement. If an opening parenthesis is followed
+-       by a question mark and a colon, the subpattern does not do any  captur-
+-       ing,  and  is  not  counted when computing the number of any subsequent
+-       capturing subpatterns. For example, if the string "the white queen"  is
++       The fact that plain parentheses fulfil  two  functions  is  not  always
++       helpful.   There are often times when a grouping subpattern is required
++       without a capturing requirement. If an opening parenthesis is  followed
++       by  a question mark and a colon, the subpattern does not do any captur-
++       ing, and is not counted when computing the  number  of  any  subsequent
++       capturing  subpatterns. For example, if the string "the white queen" is
+        matched against the pattern
+ 
+          the ((?:red|white) (king|queen))
+@@ -6055,37 +6079,37 @@ SUBPATTERNS
+        the captured substrings are "white queen" and "queen", and are numbered
+        1 and 2. The maximum number of capturing subpatterns is 65535.
+ 
+-       As a convenient shorthand, if any option settings are required  at  the
+-       start  of  a  non-capturing  subpattern,  the option letters may appear
++       As  a  convenient shorthand, if any option settings are required at the
++       start of a non-capturing subpattern,  the  option  letters  may  appear
+        between the "?" and the ":". Thus the two patterns
+ 
+          (?i:saturday|sunday)
+          (?:(?i)saturday|sunday)
+ 
+        match exactly the same set of strings. Because alternative branches are
+-       tried  from  left  to right, and options are not reset until the end of
+-       the subpattern is reached, an option setting in one branch does  affect
+-       subsequent  branches,  so  the above patterns match "SUNDAY" as well as
++       tried from left to right, and options are not reset until  the  end  of
++       the  subpattern is reached, an option setting in one branch does affect
++       subsequent branches, so the above patterns match "SUNDAY"  as  well  as
+        "Saturday".
+ 
+ 
+ DUPLICATE SUBPATTERN NUMBERS
+ 
+        Perl 5.10 introduced a feature whereby each alternative in a subpattern
+-       uses  the same numbers for its capturing parentheses. Such a subpattern
+-       starts with (?| and is itself a non-capturing subpattern. For  example,
++       uses the same numbers for its capturing parentheses. Such a  subpattern
++       starts  with (?| and is itself a non-capturing subpattern. For example,
+        consider this pattern:
+ 
+          (?|(Sat)ur|(Sun))day
+ 
+-       Because  the two alternatives are inside a (?| group, both sets of cap-
+-       turing parentheses are numbered one. Thus, when  the  pattern  matches,
+-       you  can  look  at captured substring number one, whichever alternative
+-       matched. This construct is useful when you want to  capture  part,  but
++       Because the two alternatives are inside a (?| group, both sets of  cap-
++       turing  parentheses  are  numbered one. Thus, when the pattern matches,
++       you can look at captured substring number  one,  whichever  alternative
++       matched.  This  construct  is useful when you want to capture part, but
+        not all, of one of a number of alternatives. Inside a (?| group, paren-
+-       theses are numbered as usual, but the number is reset at the  start  of
+-       each  branch.  The numbers of any capturing parentheses that follow the
+-       subpattern start after the highest number used in any branch. The  fol-
++       theses  are  numbered as usual, but the number is reset at the start of
++       each branch. The numbers of any capturing parentheses that  follow  the
++       subpattern  start after the highest number used in any branch. The fol-
+        lowing example is taken from the Perl documentation. The numbers under-
+        neath show in which buffer the captured content will be stored.
+ 
+@@ -6093,58 +6117,58 @@ DUPLICATE SUBPATTERN NUMBERS
+          / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
+          # 1            2         2  3        2     3     4
+ 
+-       A back reference to a numbered subpattern uses the  most  recent  value
+-       that  is  set  for that number by any subpattern. The following pattern
++       A  back  reference  to a numbered subpattern uses the most recent value
++       that is set for that number by any subpattern.  The  following  pattern
+        matches "abcabc" or "defdef":
+ 
+          /(?|(abc)|(def))\1/
+ 
+-       In contrast, a subroutine call to a numbered subpattern  always  refers
+-       to  the  first  one in the pattern with the given number. The following
++       In  contrast,  a subroutine call to a numbered subpattern always refers
++       to the first one in the pattern with the given  number.  The  following
+        pattern matches "abcabc" or "defabc":
+ 
+          /(?|(abc)|(def))(?1)/
+ 
+-       If a condition test for a subpattern's having matched refers to a  non-
+-       unique  number, the test is true if any of the subpatterns of that num-
++       If  a condition test for a subpattern's having matched refers to a non-
++       unique number, the test is true if any of the subpatterns of that  num-
+        ber have matched.
+ 
+-       An alternative approach to using this "branch reset" feature is to  use
++       An  alternative approach to using this "branch reset" feature is to use
+        duplicate named subpatterns, as described in the next section.
+ 
+ 
+ NAMED SUBPATTERNS
+ 
+-       Identifying  capturing  parentheses  by number is simple, but it can be
+-       very hard to keep track of the numbers in complicated  regular  expres-
+-       sions.  Furthermore,  if  an  expression  is  modified, the numbers may
+-       change. To help with this difficulty, PCRE supports the naming of  sub-
++       Identifying capturing parentheses by number is simple, but  it  can  be
++       very  hard  to keep track of the numbers in complicated regular expres-
++       sions. Furthermore, if an  expression  is  modified,  the  numbers  may
++       change.  To help with this difficulty, PCRE supports the naming of sub-
+        patterns. This feature was not added to Perl until release 5.10. Python
+-       had the feature earlier, and PCRE introduced it at release  4.0,  using
+-       the  Python syntax. PCRE now supports both the Perl and the Python syn-
+-       tax. Perl allows identically numbered  subpatterns  to  have  different
++       had  the  feature earlier, and PCRE introduced it at release 4.0, using
++       the Python syntax. PCRE now supports both the Perl and the Python  syn-
++       tax.  Perl  allows  identically  numbered subpatterns to have different
+        names, but PCRE does not.
+ 
+-       In  PCRE,  a subpattern can be named in one of three ways: (?<name>...)
+-       or (?'name'...) as in Perl, or (?P<name>...) as in  Python.  References
+-       to  capturing parentheses from other parts of the pattern, such as back
+-       references, recursion, and conditions, can be made by name as  well  as
++       In PCRE, a subpattern can be named in one of three  ways:  (?<name>...)
++       or  (?'name'...)  as in Perl, or (?P<name>...) as in Python. References
++       to capturing parentheses from other parts of the pattern, such as  back
++       references,  recursion,  and conditions, can be made by name as well as
+        by number.
+ 
+-       Names  consist of up to 32 alphanumeric characters and underscores, but
+-       must start with a non-digit.  Named  capturing  parentheses  are  still
+-       allocated  numbers  as  well as names, exactly as if the names were not
+-       present. The PCRE API provides function calls for extracting the  name-
+-       to-number  translation  table  from a compiled pattern. There is also a
++       Names consist of up to 32 alphanumeric characters and underscores,  but
++       must  start  with  a  non-digit.  Named capturing parentheses are still
++       allocated numbers as well as names, exactly as if the  names  were  not
++       present.  The PCRE API provides function calls for extracting the name-
++       to-number translation table from a compiled pattern. There  is  also  a
+        convenience function for extracting a captured substring by name.
+ 
+-       By default, a name must be unique within a pattern, but it is  possible
++       By  default, a name must be unique within a pattern, but it is possible
+        to relax this constraint by setting the PCRE_DUPNAMES option at compile
+-       time. (Duplicate names are also always permitted for  subpatterns  with
+-       the  same  number, set up as described in the previous section.) Dupli-
+-       cate names can be useful for patterns where only one  instance  of  the
+-       named  parentheses  can  match. Suppose you want to match the name of a
+-       weekday, either as a 3-letter abbreviation or as the full name, and  in
++       time.  (Duplicate  names are also always permitted for subpatterns with
++       the same number, set up as described in the previous  section.)  Dupli-
++       cate  names  can  be useful for patterns where only one instance of the
++       named parentheses can match. Suppose you want to match the  name  of  a
++       weekday,  either as a 3-letter abbreviation or as the full name, and in
+        both cases you want to extract the abbreviation. This pattern (ignoring
+        the line breaks) does the job:
+ 
+@@ -6154,18 +6178,18 @@ NAMED SUBPATTERNS
+          (?<DN>Thu)(?:rsday)?|
+          (?<DN>Sat)(?:urday)?
+ 
+-       There are five capturing substrings, but only one is ever set  after  a
++       There  are  five capturing substrings, but only one is ever set after a
+        match.  (An alternative way of solving this problem is to use a "branch
+        reset" subpattern, as described in the previous section.)
+ 
+-       The convenience function for extracting the data by  name  returns  the
+-       substring  for  the first (and in this example, the only) subpattern of
+-       that name that matched. This saves searching  to  find  which  numbered
++       The  convenience  function  for extracting the data by name returns the
++       substring for the first (and in this example, the only)  subpattern  of
++       that  name  that  matched.  This saves searching to find which numbered
+        subpattern it was.
+ 
+-       If  you  make  a  back  reference to a non-unique named subpattern from
+-       elsewhere in the pattern, the subpatterns to which the name refers  are
+-       checked  in  the order in which they appear in the overall pattern. The
++       If you make a back reference to  a  non-unique  named  subpattern  from
++       elsewhere  in the pattern, the subpatterns to which the name refers are
++       checked in the order in which they appear in the overall  pattern.  The
+        first one that is set is used for the reference. For example, this pat-
+        tern matches both "foofoo" and "barbar" but not "foobar" or "barfoo":
+ 
+@@ -6173,29 +6197,29 @@ NAMED SUBPATTERNS
+ 
+ 
+        If you make a subroutine call to a non-unique named subpattern, the one
+-       that corresponds to the first occurrence of the name is  used.  In  the
++       that  corresponds  to  the first occurrence of the name is used. In the
+        absence of duplicate numbers (see the previous section) this is the one
+        with the lowest number.
+ 
+        If you use a named reference in a condition test (see the section about
+        conditions below), either to check whether a subpattern has matched, or
+-       to check for recursion, all subpatterns with the same name are  tested.
+-       If  the condition is true for any one of them, the overall condition is
+-       true. This is the same behaviour as  testing  by  number.  For  further
+-       details  of  the  interfaces  for  handling  named subpatterns, see the
++       to  check for recursion, all subpatterns with the same name are tested.
++       If the condition is true for any one of them, the overall condition  is
++       true.  This  is  the  same  behaviour as testing by number. For further
++       details of the interfaces  for  handling  named  subpatterns,  see  the
+        pcreapi documentation.
+ 
+        Warning: You cannot use different names to distinguish between two sub-
+-       patterns  with  the same number because PCRE uses only the numbers when
++       patterns with the same number because PCRE uses only the  numbers  when
+        matching. For this reason, an error is given at compile time if differ-
+-       ent  names  are given to subpatterns with the same number. However, you
++       ent names are given to subpatterns with the same number.  However,  you
+        can always give the same name to subpatterns with the same number, even
+        when PCRE_DUPNAMES is not set.
+ 
+ 
+ REPETITION
+ 
+-       Repetition  is  specified  by  quantifiers, which can follow any of the
++       Repetition is specified by quantifiers, which can  follow  any  of  the
+        following items:
+ 
+          a literal data character
+@@ -6209,17 +6233,17 @@ REPETITION
+          a parenthesized subpattern (including assertions)
+          a subroutine call to a subpattern (recursive or otherwise)
+ 
+-       The general repetition quantifier specifies a minimum and maximum  num-
+-       ber  of  permitted matches, by giving the two numbers in curly brackets
+-       (braces), separated by a comma. The numbers must be  less  than  65536,
++       The  general repetition quantifier specifies a minimum and maximum num-
++       ber of permitted matches, by giving the two numbers in  curly  brackets
++       (braces),  separated  by  a comma. The numbers must be less than 65536,
+        and the first must be less than or equal to the second. For example:
+ 
+          z{2,4}
+ 
+-       matches  "zz",  "zzz",  or  "zzzz". A closing brace on its own is not a
+-       special character. If the second number is omitted, but  the  comma  is
+-       present,  there  is  no upper limit; if the second number and the comma
+-       are both omitted, the quantifier specifies an exact number of  required
++       matches "zz", "zzz", or "zzzz". A closing brace on its  own  is  not  a
++       special  character.  If  the second number is omitted, but the comma is
++       present, there is no upper limit; if the second number  and  the  comma
++       are  both omitted, the quantifier specifies an exact number of required
+        matches. Thus
+ 
+          [aeiou]{3,}
+@@ -6228,50 +6252,50 @@ REPETITION
+ 
+          \d{8}
+ 
+-       matches  exactly  8  digits. An opening curly bracket that appears in a
+-       position where a quantifier is not allowed, or one that does not  match
+-       the  syntax of a quantifier, is taken as a literal character. For exam-
++       matches exactly 8 digits. An opening curly bracket that  appears  in  a
++       position  where a quantifier is not allowed, or one that does not match
++       the syntax of a quantifier, is taken as a literal character. For  exam-
+        ple, {,6} is not a quantifier, but a literal string of four characters.
+ 
+        In UTF modes, quantifiers apply to characters rather than to individual
+-       data  units. Thus, for example, \x{100}{2} matches two characters, each
++       data units. Thus, for example, \x{100}{2} matches two characters,  each
+        of which is represented by a two-byte sequence in a UTF-8 string. Simi-
+-       larly,  \X{3} matches three Unicode extended grapheme clusters, each of
+-       which may be several data units long (and  they  may  be  of  different
++       larly, \X{3} matches three Unicode extended grapheme clusters, each  of
++       which  may  be  several  data  units long (and they may be of different
+        lengths).
+ 
+        The quantifier {0} is permitted, causing the expression to behave as if
+        the previous item and the quantifier were not present. This may be use-
+-       ful  for  subpatterns that are referenced as subroutines from elsewhere
++       ful for subpatterns that are referenced as subroutines  from  elsewhere
+        in the pattern (but see also the section entitled "Defining subpatterns
+-       for  use  by  reference only" below). Items other than subpatterns that
++       for use by reference only" below). Items other  than  subpatterns  that
+        have a {0} quantifier are omitted from the compiled pattern.
+ 
+-       For convenience, the three most common quantifiers have  single-charac-
++       For  convenience, the three most common quantifiers have single-charac-
+        ter abbreviations:
+ 
+          *    is equivalent to {0,}
+          +    is equivalent to {1,}
+          ?    is equivalent to {0,1}
+ 
+-       It  is  possible  to construct infinite loops by following a subpattern
++       It is possible to construct infinite loops by  following  a  subpattern
+        that can match no characters with a quantifier that has no upper limit,
+        for example:
+ 
+          (a?)*
+ 
+        Earlier versions of Perl and PCRE used to give an error at compile time
+-       for such patterns. However, because there are cases where this  can  be
+-       useful,  such  patterns  are now accepted, but if any repetition of the
+-       subpattern does in fact match no characters, the loop is forcibly  bro-
++       for  such  patterns. However, because there are cases where this can be
++       useful, such patterns are now accepted, but if any  repetition  of  the
++       subpattern  does in fact match no characters, the loop is forcibly bro-
+        ken.
+ 
+-       By  default,  the quantifiers are "greedy", that is, they match as much
+-       as possible (up to the maximum  number  of  permitted  times),  without
+-       causing  the  rest of the pattern to fail. The classic example of where
++       By default, the quantifiers are "greedy", that is, they match  as  much
++       as  possible  (up  to  the  maximum number of permitted times), without
++       causing the rest of the pattern to fail. The classic example  of  where
+        this gives problems is in trying to match comments in C programs. These
+-       appear  between  /*  and  */ and within the comment, individual * and /
+-       characters may appear. An attempt to match C comments by  applying  the
++       appear between /* and */ and within the comment,  individual  *  and  /
++       characters  may  appear. An attempt to match C comments by applying the
+        pattern
+ 
+          /\*.*\*/
+@@ -6280,19 +6304,19 @@ REPETITION
+ 
+          /* first comment */  not comment  /* second comment */
+ 
+-       fails,  because it matches the entire string owing to the greediness of
++       fails, because it matches the entire string owing to the greediness  of
+        the .*  item.
+ 
+-       However, if a quantifier is followed by a question mark, it  ceases  to
++       However,  if  a quantifier is followed by a question mark, it ceases to
+        be greedy, and instead matches the minimum number of times possible, so
+        the pattern
+ 
+          /\*.*?\*/
+ 
+-       does the right thing with the C comments. The meaning  of  the  various
+-       quantifiers  is  not  otherwise  changed,  just the preferred number of
+-       matches.  Do not confuse this use of question mark with its  use  as  a
+-       quantifier  in its own right. Because it has two uses, it can sometimes
++       does  the  right  thing with the C comments. The meaning of the various
++       quantifiers is not otherwise changed,  just  the  preferred  number  of
++       matches.   Do  not  confuse this use of question mark with its use as a
++       quantifier in its own right. Because it has two uses, it can  sometimes
+        appear doubled, as in
+ 
+          \d??\d
+@@ -6300,45 +6324,45 @@ REPETITION
+        which matches one digit by preference, but can match two if that is the
+        only way the rest of the pattern matches.
+ 
+-       If  the PCRE_UNGREEDY option is set (an option that is not available in
+-       Perl), the quantifiers are not greedy by default, but  individual  ones
+-       can  be  made  greedy  by following them with a question mark. In other
++       If the PCRE_UNGREEDY option is set (an option that is not available  in
++       Perl),  the  quantifiers are not greedy by default, but individual ones
++       can be made greedy by following them with a  question  mark.  In  other
+        words, it inverts the default behaviour.
+ 
+-       When a parenthesized subpattern is quantified  with  a  minimum  repeat
+-       count  that is greater than 1 or with a limited maximum, more memory is
+-       required for the compiled pattern, in proportion to  the  size  of  the
++       When  a  parenthesized  subpattern  is quantified with a minimum repeat
++       count that is greater than 1 or with a limited maximum, more memory  is
++       required  for  the  compiled  pattern, in proportion to the size of the
+        minimum or maximum.
+ 
+        If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equiv-
+-       alent to Perl's /s) is set, thus allowing the dot  to  match  newlines,
+-       the  pattern  is  implicitly anchored, because whatever follows will be
+-       tried against every character position in the subject string, so  there
+-       is  no  point  in  retrying the overall match at any position after the
+-       first. PCRE normally treats such a pattern as though it  were  preceded
++       alent  to  Perl's  /s) is set, thus allowing the dot to match newlines,
++       the pattern is implicitly anchored, because whatever  follows  will  be
++       tried  against every character position in the subject string, so there
++       is no point in retrying the overall match at  any  position  after  the
++       first.  PCRE  normally treats such a pattern as though it were preceded
+        by \A.
+ 
+-       In  cases  where  it  is known that the subject string contains no new-
+-       lines, it is worth setting PCRE_DOTALL in order to  obtain  this  opti-
++       In cases where it is known that the subject  string  contains  no  new-
++       lines,  it  is  worth setting PCRE_DOTALL in order to obtain this opti-
+        mization, or alternatively using ^ to indicate anchoring explicitly.
+ 
+-       However,  there  are  some cases where the optimization cannot be used.
++       However, there are some cases where the optimization  cannot  be  used.
+        When .*  is inside capturing parentheses that are the subject of a back
+        reference elsewhere in the pattern, a match at the start may fail where
+        a later one succeeds. Consider, for example:
+ 
+          (.*)abc\1
+ 
+-       If the subject is "xyz123abc123" the match point is the fourth  charac-
++       If  the subject is "xyz123abc123" the match point is the fourth charac-
+        ter. For this reason, such a pattern is not implicitly anchored.
+ 
+-       Another  case where implicit anchoring is not applied is when the lead-
+-       ing .* is inside an atomic group. Once again, a match at the start  may
++       Another case where implicit anchoring is not applied is when the  lead-
++       ing  .* is inside an atomic group. Once again, a match at the start may
+        fail where a later one succeeds. Consider this pattern:
+ 
+          (?>.*?a)b
+ 
+-       It  matches "ab" in the subject "aab". The use of the backtracking con-
++       It matches "ab" in the subject "aab". The use of the backtracking  con-
+        trol verbs (*PRUNE) and (*SKIP) also disable this optimization.
+ 
+        When a capturing subpattern is repeated, the value captured is the sub-
+@@ -6347,8 +6371,8 @@ REPETITION
+          (tweedle[dume]{3}\s*)+
+ 
+        has matched "tweedledum tweedledee" the value of the captured substring
+-       is "tweedledee". However, if there are  nested  capturing  subpatterns,
+-       the  corresponding captured values may have been set in previous itera-
++       is  "tweedledee".  However,  if there are nested capturing subpatterns,
++       the corresponding captured values may have been set in previous  itera-
+        tions. For example, after
+ 
+          /(a|(b))+/
+@@ -6358,53 +6382,53 @@ REPETITION
+ 
+ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
+ 
+-       With both maximizing ("greedy") and minimizing ("ungreedy"  or  "lazy")
+-       repetition,  failure  of what follows normally causes the repeated item
+-       to be re-evaluated to see if a different number of repeats  allows  the
+-       rest  of  the pattern to match. Sometimes it is useful to prevent this,
+-       either to change the nature of the match, or to cause it  fail  earlier
+-       than  it otherwise might, when the author of the pattern knows there is
++       With  both  maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
++       repetition, failure of what follows normally causes the  repeated  item
++       to  be  re-evaluated to see if a different number of repeats allows the
++       rest of the pattern to match. Sometimes it is useful to  prevent  this,
++       either  to  change the nature of the match, or to cause it fail earlier
++       than it otherwise might, when the author of the pattern knows there  is
+        no point in carrying on.
+ 
+-       Consider, for example, the pattern \d+foo when applied to  the  subject
++       Consider,  for  example, the pattern \d+foo when applied to the subject
+        line
+ 
+          123456bar
+ 
+        After matching all 6 digits and then failing to match "foo", the normal
+-       action of the matcher is to try again with only 5 digits  matching  the
+-       \d+  item,  and  then  with  4,  and  so on, before ultimately failing.
+-       "Atomic grouping" (a term taken from Jeffrey  Friedl's  book)  provides
+-       the  means for specifying that once a subpattern has matched, it is not
++       action  of  the matcher is to try again with only 5 digits matching the
++       \d+ item, and then with  4,  and  so  on,  before  ultimately  failing.
++       "Atomic  grouping"  (a  term taken from Jeffrey Friedl's book) provides
++       the means for specifying that once a subpattern has matched, it is  not
+        to be re-evaluated in this way.
+ 
+-       If we use atomic grouping for the previous example, the  matcher  gives
+-       up  immediately  on failing to match "foo" the first time. The notation
++       If  we  use atomic grouping for the previous example, the matcher gives
++       up immediately on failing to match "foo" the first time.  The  notation
+        is a kind of special parenthesis, starting with (?> as in this example:
+ 
+          (?>\d+)foo
+ 
+-       This kind of parenthesis "locks up" the  part of the  pattern  it  con-
+-       tains  once  it  has matched, and a failure further into the pattern is
+-       prevented from backtracking into it. Backtracking past it  to  previous
++       This  kind  of  parenthesis "locks up" the  part of the pattern it con-
++       tains once it has matched, and a failure further into  the  pattern  is
++       prevented  from  backtracking into it. Backtracking past it to previous
+        items, however, works as normal.
+ 
+-       An  alternative  description  is that a subpattern of this type matches
+-       the string of characters that an  identical  standalone  pattern  would
++       An alternative description is that a subpattern of  this  type  matches
++       the  string  of  characters  that an identical standalone pattern would
+        match, if anchored at the current point in the subject string.
+ 
+        Atomic grouping subpatterns are not capturing subpatterns. Simple cases
+        such as the above example can be thought of as a maximizing repeat that
+-       must  swallow  everything  it can. So, while both \d+ and \d+? are pre-
+-       pared to adjust the number of digits they match in order  to  make  the
++       must swallow everything it can. So, while both \d+ and  \d+?  are  pre-
++       pared  to  adjust  the number of digits they match in order to make the
+        rest of the pattern match, (?>\d+) can only match an entire sequence of
+        digits.
+ 
+-       Atomic groups in general can of course contain arbitrarily  complicated
+-       subpatterns,  and  can  be  nested. However, when the subpattern for an
++       Atomic  groups in general can of course contain arbitrarily complicated
++       subpatterns, and can be nested. However, when  the  subpattern  for  an
+        atomic group is just a single repeated item, as in the example above, a
+-       simpler  notation,  called  a "possessive quantifier" can be used. This
+-       consists of an additional + character  following  a  quantifier.  Using
++       simpler notation, called a "possessive quantifier" can  be  used.  This
++       consists  of  an  additional  + character following a quantifier. Using
+        this notation, the previous example can be rewritten as
+ 
+          \d++foo
+@@ -6414,45 +6438,45 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS
+ 
+          (abc|xyz){2,3}+
+ 
+-       Possessive  quantifiers  are  always  greedy;  the   setting   of   the
++       Possessive   quantifiers   are   always  greedy;  the  setting  of  the
+        PCRE_UNGREEDY option is ignored. They are a convenient notation for the
+-       simpler forms of atomic group. However, there is no difference  in  the
+-       meaning  of  a  possessive  quantifier and the equivalent atomic group,
+-       though there may be a performance  difference;  possessive  quantifiers
++       simpler  forms  of atomic group. However, there is no difference in the
++       meaning of a possessive quantifier and  the  equivalent  atomic  group,
++       though  there  may  be a performance difference; possessive quantifiers
+        should be slightly faster.
+ 
+-       The  possessive  quantifier syntax is an extension to the Perl 5.8 syn-
+-       tax.  Jeffrey Friedl originated the idea (and the name)  in  the  first
++       The possessive quantifier syntax is an extension to the Perl  5.8  syn-
++       tax.   Jeffrey  Friedl  originated the idea (and the name) in the first
+        edition of his book. Mike McCloskey liked it, so implemented it when he
+-       built Sun's Java package, and PCRE copied it from there. It  ultimately
++       built  Sun's Java package, and PCRE copied it from there. It ultimately
+        found its way into Perl at release 5.10.
+ 
+        PCRE has an optimization that automatically "possessifies" certain sim-
+-       ple pattern constructs. For example, the sequence  A+B  is  treated  as
+-       A++B  because  there is no point in backtracking into a sequence of A's
++       ple  pattern  constructs.  For  example, the sequence A+B is treated as
++       A++B because there is no point in backtracking into a sequence  of  A's
+        when B must follow.
+ 
+-       When a pattern contains an unlimited repeat inside  a  subpattern  that
+-       can  itself  be  repeated  an  unlimited number of times, the use of an
+-       atomic group is the only way to avoid some  failing  matches  taking  a
++       When  a  pattern  contains an unlimited repeat inside a subpattern that
++       can itself be repeated an unlimited number of  times,  the  use  of  an
++       atomic  group  is  the  only way to avoid some failing matches taking a
+        very long time indeed. The pattern
+ 
+          (\D+|<\d+>)*[!?]
+ 
+-       matches  an  unlimited number of substrings that either consist of non-
+-       digits, or digits enclosed in <>, followed by either ! or  ?.  When  it
++       matches an unlimited number of substrings that either consist  of  non-
++       digits,  or  digits  enclosed in <>, followed by either ! or ?. When it
+        matches, it runs quickly. However, if it is applied to
+ 
+          aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ 
+-       it  takes  a  long  time  before reporting failure. This is because the
+-       string can be divided between the internal \D+ repeat and the  external
+-       *  repeat  in  a  large  number of ways, and all have to be tried. (The
+-       example uses [!?] rather than a single character at  the  end,  because
+-       both  PCRE  and  Perl have an optimization that allows for fast failure
+-       when a single character is used. They remember the last single  charac-
+-       ter  that  is required for a match, and fail early if it is not present
+-       in the string.) If the pattern is changed so that  it  uses  an  atomic
++       it takes a long time before reporting  failure.  This  is  because  the
++       string  can be divided between the internal \D+ repeat and the external
++       * repeat in a large number of ways, and all  have  to  be  tried.  (The
++       example  uses  [!?]  rather than a single character at the end, because
++       both PCRE and Perl have an optimization that allows  for  fast  failure
++       when  a single character is used. They remember the last single charac-
++       ter that is required for a match, and fail early if it is  not  present
++       in  the  string.)  If  the pattern is changed so that it uses an atomic
+        group, like this:
+ 
+          ((?>\D+)|<\d+>)*[!?]
+@@ -6464,28 +6488,28 @@ BACK REFERENCES
+ 
+        Outside a character class, a backslash followed by a digit greater than
+        0 (and possibly further digits) is a back reference to a capturing sub-
+-       pattern  earlier  (that is, to its left) in the pattern, provided there
++       pattern earlier (that is, to its left) in the pattern,  provided  there
+        have been that many previous capturing left parentheses.
+ 
+        However, if the decimal number following the backslash is less than 10,
+-       it  is  always  taken  as a back reference, and causes an error only if
+-       there are not that many capturing left parentheses in the  entire  pat-
+-       tern.  In  other words, the parentheses that are referenced need not be
+-       to the left of the reference for numbers less than 10. A "forward  back
+-       reference"  of  this  type can make sense when a repetition is involved
+-       and the subpattern to the right has participated in an  earlier  itera-
++       it is always taken as a back reference, and causes  an  error  only  if
++       there  are  not that many capturing left parentheses in the entire pat-
++       tern. In other words, the parentheses that are referenced need  not  be
++       to  the left of the reference for numbers less than 10. A "forward back
++       reference" of this type can make sense when a  repetition  is  involved
++       and  the  subpattern to the right has participated in an earlier itera-
+        tion.
+ 
+-       It  is  not  possible to have a numerical "forward back reference" to a
+-       subpattern whose number is 10 or  more  using  this  syntax  because  a
+-       sequence  such  as  \50 is interpreted as a character defined in octal.
++       It is not possible to have a numerical "forward back  reference"  to  a
++       subpattern  whose  number  is  10  or  more using this syntax because a
++       sequence such as \50 is interpreted as a character  defined  in  octal.
+        See the subsection entitled "Non-printing characters" above for further
+-       details  of  the  handling of digits following a backslash. There is no
+-       such problem when named parentheses are used. A back reference  to  any
++       details of the handling of digits following a backslash.  There  is  no
++       such  problem  when named parentheses are used. A back reference to any
+        subpattern is possible using named parentheses (see below).
+ 
+-       Another  way  of  avoiding  the ambiguity inherent in the use of digits
+-       following a backslash is to use the \g  escape  sequence.  This  escape
++       Another way of avoiding the ambiguity inherent in  the  use  of  digits
++       following  a  backslash  is  to use the \g escape sequence. This escape
+        must be followed by an unsigned number or a negative number, optionally
+        enclosed in braces. These examples are all identical:
+ 
+@@ -6493,7 +6517,7 @@ BACK REFERENCES
+          (ring), \g1
+          (ring), \g{1}
+ 
+-       An unsigned number specifies an absolute reference without the  ambigu-
++       An  unsigned number specifies an absolute reference without the ambigu-
+        ity that is present in the older syntax. It is also useful when literal
+        digits follow the reference. A negative number is a relative reference.
+        Consider this example:
+@@ -6502,33 +6526,33 @@ BACK REFERENCES
+ 
+        The sequence \g{-1} is a reference to the most recently started captur-
+        ing subpattern before \g, that is, is it equivalent to \2 in this exam-
+-       ple.   Similarly, \g{-2} would be equivalent to \1. The use of relative
+-       references can be helpful in long patterns, and also in  patterns  that
+-       are  created  by  joining  together  fragments  that contain references
++       ple.  Similarly, \g{-2} would be equivalent to \1. The use of  relative
++       references  can  be helpful in long patterns, and also in patterns that
++       are created by  joining  together  fragments  that  contain  references
+        within themselves.
+ 
+-       A back reference matches whatever actually matched the  capturing  sub-
+-       pattern  in  the  current subject string, rather than anything matching
++       A  back  reference matches whatever actually matched the capturing sub-
++       pattern in the current subject string, rather  than  anything  matching
+        the subpattern itself (see "Subpatterns as subroutines" below for a way
+        of doing that). So the pattern
+ 
+          (sens|respons)e and \1ibility
+ 
+-       matches  "sense and sensibility" and "response and responsibility", but
+-       not "sense and responsibility". If caseful matching is in force at  the
+-       time  of the back reference, the case of letters is relevant. For exam-
++       matches "sense and sensibility" and "response and responsibility",  but
++       not  "sense and responsibility". If caseful matching is in force at the
++       time of the back reference, the case of letters is relevant. For  exam-
+        ple,
+ 
+          ((?i)rah)\s+\1
+ 
+-       matches "rah rah" and "RAH RAH", but not "RAH  rah",  even  though  the
++       matches  "rah  rah"  and  "RAH RAH", but not "RAH rah", even though the
+        original capturing subpattern is matched caselessly.
+ 
+-       There  are  several  different ways of writing back references to named
+-       subpatterns. The .NET syntax \k{name} and the Perl syntax  \k<name>  or
+-       \k'name'  are supported, as is the Python syntax (?P=name). Perl 5.10's
++       There are several different ways of writing back  references  to  named
++       subpatterns.  The  .NET syntax \k{name} and the Perl syntax \k<name> or
++       \k'name' are supported, as is the Python syntax (?P=name). Perl  5.10's
+        unified back reference syntax, in which \g can be used for both numeric
+-       and  named  references,  is  also supported. We could rewrite the above
++       and named references, is also supported. We  could  rewrite  the  above
+        example in any of the following ways:
+ 
+          (?<p1>(?i)rah)\s+\k<p1>
+@@ -6536,84 +6560,84 @@ BACK REFERENCES
+          (?P<p1>(?i)rah)\s+(?P=p1)
+          (?<p1>(?i)rah)\s+\g{p1}
+ 
+-       A subpattern that is referenced by  name  may  appear  in  the  pattern
++       A  subpattern  that  is  referenced  by  name may appear in the pattern
+        before or after the reference.
+ 
+-       There  may be more than one back reference to the same subpattern. If a
+-       subpattern has not actually been used in a particular match,  any  back
++       There may be more than one back reference to the same subpattern. If  a
++       subpattern  has  not actually been used in a particular match, any back
+        references to it always fail by default. For example, the pattern
+ 
+          (a|(bc))\2
+ 
+-       always  fails  if  it starts to match "a" rather than "bc". However, if
++       always fails if it starts to match "a" rather than  "bc".  However,  if
+        the PCRE_JAVASCRIPT_COMPAT option is set at compile time, a back refer-
+        ence to an unset value matches an empty string.
+ 
+-       Because  there may be many capturing parentheses in a pattern, all dig-
+-       its following a backslash are taken as part of a potential back  refer-
+-       ence  number.   If  the  pattern continues with a digit character, some
+-       delimiter must  be  used  to  terminate  the  back  reference.  If  the
+-       PCRE_EXTENDED  option  is  set, this can be white space. Otherwise, the
++       Because there may be many capturing parentheses in a pattern, all  dig-
++       its  following a backslash are taken as part of a potential back refer-
++       ence number.  If the pattern continues with  a  digit  character,  some
++       delimiter  must  be  used  to  terminate  the  back  reference.  If the
++       PCRE_EXTENDED option is set, this can be white  space.  Otherwise,  the
+        \g{ syntax or an empty comment (see "Comments" below) can be used.
+ 
+    Recursive back references
+ 
+-       A back reference that occurs inside the parentheses to which it  refers
+-       fails  when  the subpattern is first used, so, for example, (a\1) never
+-       matches.  However, such references can be useful inside  repeated  sub-
++       A  back reference that occurs inside the parentheses to which it refers
++       fails when the subpattern is first used, so, for example,  (a\1)  never
++       matches.   However,  such references can be useful inside repeated sub-
+        patterns. For example, the pattern
+ 
+          (a|b\1)+
+ 
+        matches any number of "a"s and also "aba", "ababbaa" etc. At each iter-
+-       ation of the subpattern,  the  back  reference  matches  the  character
+-       string  corresponding  to  the previous iteration. In order for this to
+-       work, the pattern must be such that the first iteration does  not  need
+-       to  match the back reference. This can be done using alternation, as in
++       ation  of  the  subpattern,  the  back  reference matches the character
++       string corresponding to the previous iteration. In order  for  this  to
++       work,  the  pattern must be such that the first iteration does not need
++       to match the back reference. This can be done using alternation, as  in
+        the example above, or by a quantifier with a minimum of zero.
+ 
+-       Back references of this type cause the group that they reference to  be
+-       treated  as  an atomic group.  Once the whole group has been matched, a
+-       subsequent matching failure cannot cause backtracking into  the  middle
++       Back  references of this type cause the group that they reference to be
++       treated as an atomic group.  Once the whole group has been  matched,  a
++       subsequent  matching  failure cannot cause backtracking into the middle
+        of the group.
+ 
+ 
+ ASSERTIONS
+ 
+-       An  assertion  is  a  test on the characters following or preceding the
+-       current matching point that does not actually consume  any  characters.
+-       The  simple  assertions  coded  as  \b, \B, \A, \G, \Z, \z, ^ and $ are
++       An assertion is a test on the characters  following  or  preceding  the
++       current  matching  point that does not actually consume any characters.
++       The simple assertions coded as \b, \B, \A, \G, \Z,  \z,  ^  and  $  are
+        described above.
+ 
+-       More complicated assertions are coded as  subpatterns.  There  are  two
+-       kinds:  those  that  look  ahead of the current position in the subject
+-       string, and those that look  behind  it.  An  assertion  subpattern  is
+-       matched  in  the  normal way, except that it does not cause the current
++       More  complicated  assertions  are  coded as subpatterns. There are two
++       kinds: those that look ahead of the current  position  in  the  subject
++       string,  and  those  that  look  behind  it. An assertion subpattern is
++       matched in the normal way, except that it does not  cause  the  current
+        matching position to be changed.
+ 
+-       Assertion subpatterns are not capturing subpatterns. If such an  asser-
+-       tion  contains  capturing  subpatterns within it, these are counted for
+-       the purposes of numbering the capturing subpatterns in the  whole  pat-
+-       tern.  However,  substring  capturing  is carried out only for positive
++       Assertion  subpatterns are not capturing subpatterns. If such an asser-
++       tion contains capturing subpatterns within it, these  are  counted  for
++       the  purposes  of numbering the capturing subpatterns in the whole pat-
++       tern. However, substring capturing is carried  out  only  for  positive
+        assertions. (Perl sometimes, but not always, does do capturing in nega-
+        tive assertions.)
+ 
+-       For  compatibility  with  Perl,  assertion subpatterns may be repeated;
+-       though it makes no sense to assert the same thing  several  times,  the
+-       side  effect  of  capturing  parentheses may occasionally be useful. In
++       For compatibility with Perl, assertion  subpatterns  may  be  repeated;
++       though  it  makes  no sense to assert the same thing several times, the
++       side effect of capturing parentheses may  occasionally  be  useful.  In
+        practice, there only three cases:
+ 
+-       (1) If the quantifier is {0}, the  assertion  is  never  obeyed  during
+-       matching.   However,  it  may  contain internal capturing parenthesized
++       (1)  If  the  quantifier  is  {0}, the assertion is never obeyed during
++       matching.  However, it may  contain  internal  capturing  parenthesized
+        groups that are called from elsewhere via the subroutine mechanism.
+ 
+-       (2) If quantifier is {0,n} where n is greater than zero, it is  treated
+-       as  if  it  were  {0,1}.  At run time, the rest of the pattern match is
++       (2)  If quantifier is {0,n} where n is greater than zero, it is treated
++       as if it were {0,1}. At run time, the rest  of  the  pattern  match  is
+        tried with and without the assertion, the order depending on the greed-
+        iness of the quantifier.
+ 
+-       (3)  If  the minimum repetition is greater than zero, the quantifier is
+-       ignored.  The assertion is obeyed just  once  when  encountered  during
++       (3) If the minimum repetition is greater than zero, the  quantifier  is
++       ignored.   The  assertion  is  obeyed just once when encountered during
+        matching.
+ 
+    Lookahead assertions
+@@ -6623,38 +6647,38 @@ ASSERTIONS
+ 
+          \w+(?=;)
+ 
+-       matches a word followed by a semicolon, but does not include the  semi-
++       matches  a word followed by a semicolon, but does not include the semi-
+        colon in the match, and
+ 
+          foo(?!bar)
+ 
+-       matches  any  occurrence  of  "foo" that is not followed by "bar". Note
++       matches any occurrence of "foo" that is not  followed  by  "bar".  Note
+        that the apparently similar pattern
+ 
+          (?!foo)bar
+ 
+-       does not find an occurrence of "bar"  that  is  preceded  by  something
+-       other  than "foo"; it finds any occurrence of "bar" whatsoever, because
++       does  not  find  an  occurrence  of "bar" that is preceded by something
++       other than "foo"; it finds any occurrence of "bar" whatsoever,  because
+        the assertion (?!foo) is always true when the next three characters are
+        "bar". A lookbehind assertion is needed to achieve the other effect.
+ 
+        If you want to force a matching failure at some point in a pattern, the
+-       most convenient way to do it is  with  (?!)  because  an  empty  string
+-       always  matches, so an assertion that requires there not to be an empty
++       most  convenient  way  to  do  it  is with (?!) because an empty string
++       always matches, so an assertion that requires there not to be an  empty
+        string must always fail.  The backtracking control verb (*FAIL) or (*F)
+        is a synonym for (?!).
+ 
+    Lookbehind assertions
+ 
+-       Lookbehind  assertions start with (?<= for positive assertions and (?<!
++       Lookbehind assertions start with (?<= for positive assertions and  (?<!
+        for negative assertions. For example,
+ 
+          (?<!foo)bar
+ 
+-       does find an occurrence of "bar" that is not  preceded  by  "foo".  The
+-       contents  of  a  lookbehind  assertion are restricted such that all the
++       does  find  an  occurrence  of "bar" that is not preceded by "foo". The
++       contents of a lookbehind assertion are restricted  such  that  all  the
+        strings it matches must have a fixed length. However, if there are sev-
+-       eral  top-level  alternatives,  they  do  not all have to have the same
++       eral top-level alternatives, they do not all  have  to  have  the  same
+        fixed length. Thus
+ 
+          (?<=bullock|donkey)
+@@ -6663,62 +6687,62 @@ ASSERTIONS
+ 
+          (?<!dogs?|cats?)
+ 
+-       causes an error at compile time. Branches that match  different  length
+-       strings  are permitted only at the top level of a lookbehind assertion.
++       causes  an  error at compile time. Branches that match different length
++       strings are permitted only at the top level of a lookbehind  assertion.
+        This is an extension compared with Perl, which requires all branches to
+        match the same length of string. An assertion such as
+ 
+          (?<=ab(c|de))
+ 
+-       is  not  permitted,  because  its single top-level branch can match two
++       is not permitted, because its single top-level  branch  can  match  two
+        different lengths, but it is acceptable to PCRE if rewritten to use two
+        top-level branches:
+ 
+          (?<=abc|abde)
+ 
+-       In  some  cases, the escape sequence \K (see above) can be used instead
++       In some cases, the escape sequence \K (see above) can be  used  instead
+        of a lookbehind assertion to get round the fixed-length restriction.
+ 
+-       The implementation of lookbehind assertions is, for  each  alternative,
+-       to  temporarily  move the current position back by the fixed length and
++       The  implementation  of lookbehind assertions is, for each alternative,
++       to temporarily move the current position back by the fixed  length  and
+        then try to match. If there are insufficient characters before the cur-
+        rent position, the assertion fails.
+ 
+-       In  a UTF mode, PCRE does not allow the \C escape (which matches a sin-
+-       gle data unit even in a UTF mode) to appear in  lookbehind  assertions,
+-       because  it  makes it impossible to calculate the length of the lookbe-
+-       hind. The \X and \R escapes, which can match different numbers of  data
++       In a UTF mode, PCRE does not allow the \C escape (which matches a  sin-
++       gle  data  unit even in a UTF mode) to appear in lookbehind assertions,
++       because it makes it impossible to calculate the length of  the  lookbe-
++       hind.  The \X and \R escapes, which can match different numbers of data
+        units, are also not permitted.
+ 
+-       "Subroutine"  calls  (see below) such as (?2) or (?&X) are permitted in
+-       lookbehinds, as long as the subpattern matches a  fixed-length  string.
++       "Subroutine" calls (see below) such as (?2) or (?&X) are  permitted  in
++       lookbehinds,  as  long as the subpattern matches a fixed-length string.
+        Recursion, however, is not supported.
+ 
+-       Possessive  quantifiers  can  be  used  in  conjunction with lookbehind
++       Possessive quantifiers can  be  used  in  conjunction  with  lookbehind
+        assertions to specify efficient matching of fixed-length strings at the
+        end of subject strings. Consider a simple pattern such as
+ 
+          abcd$
+ 
+-       when  applied  to  a  long string that does not match. Because matching
++       when applied to a long string that does  not  match.  Because  matching
+        proceeds from left to right, PCRE will look for each "a" in the subject
+-       and  then  see  if what follows matches the rest of the pattern. If the
++       and then see if what follows matches the rest of the  pattern.  If  the
+        pattern is specified as
+ 
+          ^.*abcd$
+ 
+-       the initial .* matches the entire string at first, but when this  fails
++       the  initial .* matches the entire string at first, but when this fails
+        (because there is no following "a"), it backtracks to match all but the
+-       last character, then all but the last two characters, and so  on.  Once
+-       again  the search for "a" covers the entire string, from right to left,
++       last  character,  then all but the last two characters, and so on. Once
++       again the search for "a" covers the entire string, from right to  left,
+        so we are no better off. However, if the pattern is written as
+ 
+          ^.*+(?<=abcd)
+ 
+-       there can be no backtracking for the .*+ item; it can  match  only  the
+-       entire  string.  The subsequent lookbehind assertion does a single test
+-       on the last four characters. If it fails, the match fails  immediately.
+-       For  long  strings, this approach makes a significant difference to the
++       there  can  be  no backtracking for the .*+ item; it can match only the
++       entire string. The subsequent lookbehind assertion does a  single  test
++       on  the last four characters. If it fails, the match fails immediately.
++       For long strings, this approach makes a significant difference  to  the
+        processing time.
+ 
+    Using multiple assertions
+@@ -6727,18 +6751,18 @@ ASSERTIONS
+ 
+          (?<=\d{3})(?<!999)foo
+ 
+-       matches "foo" preceded by three digits that are not "999". Notice  that
+-       each  of  the  assertions is applied independently at the same point in
+-       the subject string. First there is a  check  that  the  previous  three
+-       characters  are  all  digits,  and  then there is a check that the same
++       matches  "foo" preceded by three digits that are not "999". Notice that
++       each of the assertions is applied independently at the  same  point  in
++       the  subject  string.  First  there  is a check that the previous three
++       characters are all digits, and then there is  a  check  that  the  same
+        three characters are not "999".  This pattern does not match "foo" pre-
+-       ceded  by  six  characters,  the first of which are digits and the last
+-       three of which are not "999". For example, it  doesn't  match  "123abc-
++       ceded by six characters, the first of which are  digits  and  the  last
++       three  of  which  are not "999". For example, it doesn't match "123abc-
+        foo". A pattern to do that is
+ 
+          (?<=\d{3}...)(?<!999)foo
+ 
+-       This  time  the  first assertion looks at the preceding six characters,
++       This time the first assertion looks at the  preceding  six  characters,
+        checking that the first three are digits, and then the second assertion
+        checks that the preceding three characters are not "999".
+ 
+@@ -6746,29 +6770,29 @@ ASSERTIONS
+ 
+          (?<=(?<!foo)bar)baz
+ 
+-       matches  an occurrence of "baz" that is preceded by "bar" which in turn
++       matches an occurrence of "baz" that is preceded by "bar" which in  turn
+        is not preceded by "foo", while
+ 
+          (?<=\d{3}(?!999)...)foo
+ 
+-       is another pattern that matches "foo" preceded by three digits and  any
++       is  another pattern that matches "foo" preceded by three digits and any
+        three characters that are not "999".
+ 
+ 
+ CONDITIONAL SUBPATTERNS
+ 
+-       It  is possible to cause the matching process to obey a subpattern con-
+-       ditionally or to choose between two alternative subpatterns,  depending
+-       on  the result of an assertion, or whether a specific capturing subpat-
+-       tern has already been matched. The two possible  forms  of  conditional
++       It is possible to cause the matching process to obey a subpattern  con-
++       ditionally  or to choose between two alternative subpatterns, depending
++       on the result of an assertion, or whether a specific capturing  subpat-
++       tern  has  already  been matched. The two possible forms of conditional
+        subpattern are:
+ 
+          (?(condition)yes-pattern)
+          (?(condition)yes-pattern|no-pattern)
+ 
+-       If  the  condition is satisfied, the yes-pattern is used; otherwise the
+-       no-pattern (if present) is used. If there are more  than  two  alterna-
+-       tives  in  the subpattern, a compile-time error occurs. Each of the two
++       If the condition is satisfied, the yes-pattern is used;  otherwise  the
++       no-pattern  (if  present)  is used. If there are more than two alterna-
++       tives in the subpattern, a compile-time error occurs. Each of  the  two
+        alternatives may itself contain nested subpatterns of any form, includ-
+        ing  conditional  subpatterns;  the  restriction  to  two  alternatives
+        applies only at the level of the condition. This pattern fragment is an
+@@ -6777,68 +6801,68 @@ CONDITIONAL SUBPATTERNS
+          (?(1) (A|B|C) | (D | (?(2)E|F) | E) )
+ 
+ 
+-       There  are  four  kinds of condition: references to subpatterns, refer-
++       There are four kinds of condition: references  to  subpatterns,  refer-
+        ences to recursion, a pseudo-condition called DEFINE, and assertions.
+ 
+    Checking for a used subpattern by number
+ 
+-       If the text between the parentheses consists of a sequence  of  digits,
++       If  the  text between the parentheses consists of a sequence of digits,
+        the condition is true if a capturing subpattern of that number has pre-
+-       viously matched. If there is more than one  capturing  subpattern  with
+-       the  same  number  (see  the earlier section about duplicate subpattern
+-       numbers), the condition is true if any of them have matched. An  alter-
+-       native  notation is to precede the digits with a plus or minus sign. In
+-       this case, the subpattern number is relative rather than absolute.  The
+-       most  recently opened parentheses can be referenced by (?(-1), the next
+-       most recent by (?(-2), and so on. Inside loops it can also  make  sense
++       viously  matched.  If  there is more than one capturing subpattern with
++       the same number (see the earlier  section  about  duplicate  subpattern
++       numbers),  the condition is true if any of them have matched. An alter-
++       native notation is to precede the digits with a plus or minus sign.  In
++       this  case, the subpattern number is relative rather than absolute. The
++       most recently opened parentheses can be referenced by (?(-1), the  next
++       most  recent  by (?(-2), and so on. Inside loops it can also make sense
+        to refer to subsequent groups. The next parentheses to be opened can be
+-       referenced as (?(+1), and so on. (The value zero in any of these  forms
++       referenced  as (?(+1), and so on. (The value zero in any of these forms
+        is not used; it provokes a compile-time error.)
+ 
+-       Consider  the  following  pattern, which contains non-significant white
++       Consider the following pattern, which  contains  non-significant  white
+        space to make it more readable (assume the PCRE_EXTENDED option) and to
+        divide it into three parts for ease of discussion:
+ 
+          ( \( )?    [^()]+    (?(1) \) )
+ 
+-       The  first  part  matches  an optional opening parenthesis, and if that
++       The first part matches an optional opening  parenthesis,  and  if  that
+        character is present, sets it as the first captured substring. The sec-
+-       ond  part  matches one or more characters that are not parentheses. The
+-       third part is a conditional subpattern that tests whether  or  not  the
+-       first  set  of  parentheses  matched.  If they did, that is, if subject
+-       started with an opening parenthesis, the condition is true, and so  the
+-       yes-pattern  is  executed and a closing parenthesis is required. Other-
+-       wise, since no-pattern is not present, the subpattern matches  nothing.
+-       In  other  words,  this  pattern matches a sequence of non-parentheses,
++       ond part matches one or more characters that are not  parentheses.  The
++       third  part  is  a conditional subpattern that tests whether or not the
++       first set of parentheses matched. If they  did,  that  is,  if  subject
++       started  with an opening parenthesis, the condition is true, and so the
++       yes-pattern is executed and a closing parenthesis is  required.  Other-
++       wise,  since no-pattern is not present, the subpattern matches nothing.
++       In other words, this pattern matches  a  sequence  of  non-parentheses,
+        optionally enclosed in parentheses.
+ 
+-       If you were embedding this pattern in a larger one,  you  could  use  a
++       If  you  were  embedding  this pattern in a larger one, you could use a
+        relative reference:
+ 
+          ...other stuff... ( \( )?    [^()]+    (?(-1) \) ) ...
+ 
+-       This  makes  the  fragment independent of the parentheses in the larger
++       This makes the fragment independent of the parentheses  in  the  larger
+        pattern.
+ 
+    Checking for a used subpattern by name
+ 
+-       Perl uses the syntax (?(<name>)...) or (?('name')...)  to  test  for  a
+-       used  subpattern  by  name.  For compatibility with earlier versions of
+-       PCRE, which had this facility before Perl, the syntax  (?(name)...)  is
++       Perl  uses  the  syntax  (?(<name>)...) or (?('name')...) to test for a
++       used subpattern by name. For compatibility  with  earlier  versions  of
++       PCRE,  which  had this facility before Perl, the syntax (?(name)...) is
+        also recognized.
+ 
+        Rewriting the above example to use a named subpattern gives this:
+ 
+          (?<OPEN> \( )?    [^()]+    (?(<OPEN>) \) )
+ 
+-       If  the  name used in a condition of this kind is a duplicate, the test
+-       is applied to all subpatterns of the same name, and is true if any  one
++       If the name used in a condition of this kind is a duplicate,  the  test
++       is  applied to all subpatterns of the same name, and is true if any one
+        of them has matched.
+ 
+    Checking for pattern recursion
+ 
+        If the condition is the string (R), and there is no subpattern with the
+-       name R, the condition is true if a recursive call to the whole  pattern
++       name  R, the condition is true if a recursive call to the whole pattern
+        or any subpattern has been made. If digits or a name preceded by amper-
+        sand follow the letter R, for example:
+ 
+@@ -6846,51 +6870,51 @@ CONDITIONAL SUBPATTERNS
+ 
+        the condition is true if the most recent recursion is into a subpattern
+        whose number or name is given. This condition does not check the entire
+-       recursion stack. If the name used in a condition  of  this  kind  is  a
++       recursion  stack.  If  the  name  used in a condition of this kind is a
+        duplicate, the test is applied to all subpatterns of the same name, and
+        is true if any one of them is the most recent recursion.
+ 
+-       At "top level", all these recursion test  conditions  are  false.   The
++       At  "top  level",  all  these recursion test conditions are false.  The
+        syntax for recursive patterns is described below.
+ 
+    Defining subpatterns for use by reference only
+ 
+-       If  the  condition  is  the string (DEFINE), and there is no subpattern
+-       with the name DEFINE, the condition is  always  false.  In  this  case,
+-       there  may  be  only  one  alternative  in the subpattern. It is always
+-       skipped if control reaches this point  in  the  pattern;  the  idea  of
+-       DEFINE  is that it can be used to define subroutines that can be refer-
+-       enced from elsewhere. (The use of subroutines is described below.)  For
+-       example,  a  pattern  to match an IPv4 address such as "192.168.23.245"
++       If the condition is the string (DEFINE), and  there  is  no  subpattern
++       with  the  name  DEFINE,  the  condition is always false. In this case,
++       there may be only one alternative  in  the  subpattern.  It  is  always
++       skipped  if  control  reaches  this  point  in the pattern; the idea of
++       DEFINE is that it can be used to define subroutines that can be  refer-
++       enced  from elsewhere. (The use of subroutines is described below.) For
++       example, a pattern to match an IPv4 address  such  as  "192.168.23.245"
+        could be written like this (ignore white space and line breaks):
+ 
+          (?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
+          \b (?&byte) (\.(?&byte)){3} \b
+ 
+-       The first part of the pattern is a DEFINE group inside which a  another
+-       group  named "byte" is defined. This matches an individual component of
+-       an IPv4 address (a number less than 256). When  matching  takes  place,
+-       this  part  of  the pattern is skipped because DEFINE acts like a false
+-       condition. The rest of the pattern uses references to the  named  group
+-       to  match the four dot-separated components of an IPv4 address, insist-
++       The  first part of the pattern is a DEFINE group inside which a another
++       group named "byte" is defined. This matches an individual component  of
++       an  IPv4  address  (a number less than 256). When matching takes place,
++       this part of the pattern is skipped because DEFINE acts  like  a  false
++       condition.  The  rest of the pattern uses references to the named group
++       to match the four dot-separated components of an IPv4 address,  insist-
+        ing on a word boundary at each end.
+ 
+    Assertion conditions
+ 
+-       If the condition is not in any of the above  formats,  it  must  be  an
+-       assertion.   This may be a positive or negative lookahead or lookbehind
+-       assertion. Consider  this  pattern,  again  containing  non-significant
++       If  the  condition  is  not  in any of the above formats, it must be an
++       assertion.  This may be a positive or negative lookahead or  lookbehind
++       assertion.  Consider  this  pattern,  again  containing non-significant
+        white space, and with the two alternatives on the second line:
+ 
+          (?(?=[^a-z]*[a-z])
+          \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} )
+ 
+-       The  condition  is  a  positive  lookahead  assertion  that  matches an
+-       optional sequence of non-letters followed by a letter. In other  words,
+-       it  tests  for the presence of at least one letter in the subject. If a
+-       letter is found, the subject is matched against the first  alternative;
+-       otherwise  it  is  matched  against  the  second.  This pattern matches
+-       strings in one of the two forms dd-aaa-dd or dd-dd-dd,  where  aaa  are
++       The condition  is  a  positive  lookahead  assertion  that  matches  an
++       optional  sequence of non-letters followed by a letter. In other words,
++       it tests for the presence of at least one letter in the subject.  If  a
++       letter  is found, the subject is matched against the first alternative;
++       otherwise it is  matched  against  the  second.  This  pattern  matches
++       strings  in  one  of the two forms dd-aaa-dd or dd-dd-dd, where aaa are
+        letters and dd are digits.
+ 
+ 
+@@ -6899,41 +6923,41 @@ COMMENTS
+        There are two ways of including comments in patterns that are processed
+        by PCRE. In both cases, the start of the comment must not be in a char-
+        acter class, nor in the middle of any other sequence of related charac-
+-       ters such as (?: or a subpattern name or number.  The  characters  that
++       ters  such  as  (?: or a subpattern name or number. The characters that
+        make up a comment play no part in the pattern matching.
+ 
+-       The  sequence (?# marks the start of a comment that continues up to the
+-       next closing parenthesis. Nested parentheses are not permitted. If  the
++       The sequence (?# marks the start of a comment that continues up to  the
++       next  closing parenthesis. Nested parentheses are not permitted. If the
+        PCRE_EXTENDED option is set, an unescaped # character also introduces a
+-       comment, which in this case continues to  immediately  after  the  next
+-       newline  character  or character sequence in the pattern. Which charac-
++       comment,  which  in  this  case continues to immediately after the next
++       newline character or character sequence in the pattern.  Which  charac-
+        ters are interpreted as newlines is controlled by the options passed to
+-       a  compiling function or by a special sequence at the start of the pat-
++       a compiling function or by a special sequence at the start of the  pat-
+        tern, as described in the section entitled "Newline conventions" above.
+        Note that the end of this type of comment is a literal newline sequence
+-       in the pattern; escape sequences that happen to represent a newline  do
+-       not  count.  For  example,  consider this pattern when PCRE_EXTENDED is
++       in  the pattern; escape sequences that happen to represent a newline do
++       not count. For example, consider this  pattern  when  PCRE_EXTENDED  is
+        set, and the default newline convention is in force:
+ 
+          abc #comment \n still comment
+ 
+-       On encountering the # character, pcre_compile()  skips  along,  looking
+-       for  a newline in the pattern. The sequence \n is still literal at this
+-       stage, so it does not terminate the comment. Only an  actual  character
++       On  encountering  the  # character, pcre_compile() skips along, looking
++       for a newline in the pattern. The sequence \n is still literal at  this
++       stage,  so  it does not terminate the comment. Only an actual character
+        with the code value 0x0a (the default newline) does so.
+ 
+ 
+ RECURSIVE PATTERNS
+ 
+-       Consider  the problem of matching a string in parentheses, allowing for
+-       unlimited nested parentheses. Without the use of  recursion,  the  best
+-       that  can  be  done  is  to use a pattern that matches up to some fixed
+-       depth of nesting. It is not possible to  handle  an  arbitrary  nesting
++       Consider the problem of matching a string in parentheses, allowing  for
++       unlimited  nested  parentheses.  Without the use of recursion, the best
++       that can be done is to use a pattern that  matches  up  to  some  fixed
++       depth  of  nesting.  It  is not possible to handle an arbitrary nesting
+        depth.
+ 
+        For some time, Perl has provided a facility that allows regular expres-
+-       sions to recurse (amongst other things). It does this by  interpolating
+-       Perl  code in the expression at run time, and the code can refer to the
++       sions  to recurse (amongst other things). It does this by interpolating
++       Perl code in the expression at run time, and the code can refer to  the
+        expression itself. A Perl pattern using code interpolation to solve the
+        parentheses problem can be created like this:
+ 
+@@ -6943,201 +6967,201 @@ RECURSIVE PATTERNS
+        refers recursively to the pattern in which it appears.
+ 
+        Obviously, PCRE cannot support the interpolation of Perl code. Instead,
+-       it  supports  special  syntax  for recursion of the entire pattern, and
+-       also for individual subpattern recursion.  After  its  introduction  in
+-       PCRE  and  Python,  this  kind of recursion was subsequently introduced
++       it supports special syntax for recursion of  the  entire  pattern,  and
++       also  for  individual  subpattern  recursion. After its introduction in
++       PCRE and Python, this kind of  recursion  was  subsequently  introduced
+        into Perl at release 5.10.
+ 
+-       A special item that consists of (? followed by a  number  greater  than
+-       zero  and  a  closing parenthesis is a recursive subroutine call of the
+-       subpattern of the given number, provided that  it  occurs  inside  that
+-       subpattern.  (If  not,  it is a non-recursive subroutine call, which is
+-       described in the next section.) The special item  (?R)  or  (?0)  is  a
++       A  special  item  that consists of (? followed by a number greater than
++       zero and a closing parenthesis is a recursive subroutine  call  of  the
++       subpattern  of  the  given  number, provided that it occurs inside that
++       subpattern. (If not, it is a non-recursive subroutine  call,  which  is
++       described  in  the  next  section.)  The special item (?R) or (?0) is a
+        recursive call of the entire regular expression.
+ 
+-       This  PCRE  pattern  solves  the nested parentheses problem (assume the
++       This PCRE pattern solves the nested  parentheses  problem  (assume  the
+        PCRE_EXTENDED option is set so that white space is ignored):
+ 
+          \( ( [^()]++ | (?R) )* \)
+ 
+-       First it matches an opening parenthesis. Then it matches any number  of
+-       substrings  which  can  either  be  a sequence of non-parentheses, or a
+-       recursive match of the pattern itself (that is, a  correctly  parenthe-
++       First  it matches an opening parenthesis. Then it matches any number of
++       substrings which can either be a  sequence  of  non-parentheses,  or  a
++       recursive  match  of the pattern itself (that is, a correctly parenthe-
+        sized substring).  Finally there is a closing parenthesis. Note the use
+        of a possessive quantifier to avoid backtracking into sequences of non-
+        parentheses.
+ 
+-       If  this  were  part of a larger pattern, you would not want to recurse
++       If this were part of a larger pattern, you would not  want  to  recurse
+        the entire pattern, so instead you could use this:
+ 
+          ( \( ( [^()]++ | (?1) )* \) )
+ 
+-       We have put the pattern into parentheses, and caused the  recursion  to
++       We  have  put the pattern into parentheses, and caused the recursion to
+        refer to them instead of the whole pattern.
+ 
+-       In  a  larger  pattern,  keeping  track  of  parenthesis numbers can be
+-       tricky. This is made easier by the use of relative references.  Instead
++       In a larger pattern,  keeping  track  of  parenthesis  numbers  can  be
++       tricky.  This is made easier by the use of relative references. Instead
+        of (?1) in the pattern above you can write (?-2) to refer to the second
+-       most recently opened parentheses  preceding  the  recursion.  In  other
+-       words,  a  negative  number counts capturing parentheses leftwards from
++       most  recently  opened  parentheses  preceding  the recursion. In other
++       words, a negative number counts capturing  parentheses  leftwards  from
+        the point at which it is encountered.
+ 
+-       It is also possible to refer to  subsequently  opened  parentheses,  by
+-       writing  references  such  as (?+2). However, these cannot be recursive
+-       because the reference is not inside the  parentheses  that  are  refer-
+-       enced.  They are always non-recursive subroutine calls, as described in
++       It  is  also  possible  to refer to subsequently opened parentheses, by
++       writing references such as (?+2). However, these  cannot  be  recursive
++       because  the  reference  is  not inside the parentheses that are refer-
++       enced. They are always non-recursive subroutine calls, as described  in
+        the next section.
+ 
+-       An alternative approach is to use named parentheses instead.  The  Perl
+-       syntax  for  this  is (?&name); PCRE's earlier syntax (?P>name) is also
++       An  alternative  approach is to use named parentheses instead. The Perl
++       syntax for this is (?&name); PCRE's earlier syntax  (?P>name)  is  also
+        supported. We could rewrite the above example as follows:
+ 
+          (?<pn> \( ( [^()]++ | (?&pn) )* \) )
+ 
+-       If there is more than one subpattern with the same name,  the  earliest
++       If  there  is more than one subpattern with the same name, the earliest
+        one is used.
+ 
+-       This  particular  example pattern that we have been looking at contains
++       This particular example pattern that we have been looking  at  contains
+        nested unlimited repeats, and so the use of a possessive quantifier for
+        matching strings of non-parentheses is important when applying the pat-
+-       tern to strings that do not match. For example, when  this  pattern  is
++       tern  to  strings  that do not match. For example, when this pattern is
+        applied to
+ 
+          (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
+ 
+-       it  yields  "no  match" quickly. However, if a possessive quantifier is
+-       not used, the match runs for a very long time indeed because there  are
+-       so  many  different  ways the + and * repeats can carve up the subject,
++       it yields "no match" quickly. However, if a  possessive  quantifier  is
++       not  used, the match runs for a very long time indeed because there are
++       so many different ways the + and * repeats can carve  up  the  subject,
+        and all have to be tested before failure can be reported.
+ 
+-       At the end of a match, the values of capturing  parentheses  are  those
+-       from  the outermost level. If you want to obtain intermediate values, a
+-       callout function can be used (see below and the pcrecallout  documenta-
++       At  the  end  of a match, the values of capturing parentheses are those
++       from the outermost level. If you want to obtain intermediate values,  a
++       callout  function can be used (see below and the pcrecallout documenta-
+        tion). If the pattern above is matched against
+ 
+          (ab(cd)ef)
+ 
+-       the  value  for  the  inner capturing parentheses (numbered 2) is "ef",
+-       which is the last value taken on at the top level. If a capturing  sub-
+-       pattern  is  not  matched at the top level, its final captured value is
+-       unset, even if it was (temporarily) set at a deeper  level  during  the
++       the value for the inner capturing parentheses  (numbered  2)  is  "ef",
++       which  is the last value taken on at the top level. If a capturing sub-
++       pattern is not matched at the top level, its final  captured  value  is
++       unset,  even  if  it was (temporarily) set at a deeper level during the
+        matching process.
+ 
+-       If  there are more than 15 capturing parentheses in a pattern, PCRE has
+-       to obtain extra memory to store data during a recursion, which it  does
++       If there are more than 15 capturing parentheses in a pattern, PCRE  has
++       to  obtain extra memory to store data during a recursion, which it does
+        by using pcre_malloc, freeing it via pcre_free afterwards. If no memory
+        can be obtained, the match fails with the PCRE_ERROR_NOMEMORY error.
+ 
+-       Do not confuse the (?R) item with the condition (R),  which  tests  for
+-       recursion.   Consider  this pattern, which matches text in angle brack-
+-       ets, allowing for arbitrary nesting. Only digits are allowed in  nested
+-       brackets  (that is, when recursing), whereas any characters are permit-
++       Do  not  confuse  the (?R) item with the condition (R), which tests for
++       recursion.  Consider this pattern, which matches text in  angle  brack-
++       ets,  allowing for arbitrary nesting. Only digits are allowed in nested
++       brackets (that is, when recursing), whereas any characters are  permit-
+        ted at the outer level.
+ 
+          < (?: (?(R) \d++  | [^<>]*+) | (?R)) * >
+ 
+-       In this pattern, (?(R) is the start of a conditional  subpattern,  with
+-       two  different  alternatives for the recursive and non-recursive cases.
++       In  this  pattern, (?(R) is the start of a conditional subpattern, with
++       two different alternatives for the recursive and  non-recursive  cases.
+        The (?R) item is the actual recursive call.
+ 
+    Differences in recursion processing between PCRE and Perl
+ 
+-       Recursion processing in PCRE differs from Perl in two  important  ways.
+-       In  PCRE (like Python, but unlike Perl), a recursive subpattern call is
++       Recursion  processing  in PCRE differs from Perl in two important ways.
++       In PCRE (like Python, but unlike Perl), a recursive subpattern call  is
+        always treated as an atomic group. That is, once it has matched some of
+        the subject string, it is never re-entered, even if it contains untried
+-       alternatives and there is a subsequent matching failure.  This  can  be
+-       illustrated  by the following pattern, which purports to match a palin-
+-       dromic string that contains an odd number of characters  (for  example,
++       alternatives  and  there  is a subsequent matching failure. This can be
++       illustrated by the following pattern, which purports to match a  palin-
++       dromic  string  that contains an odd number of characters (for example,
+        "a", "aba", "abcba", "abcdcba"):
+ 
+          ^(.|(.)(?1)\2)$
+ 
+        The idea is that it either matches a single character, or two identical
+-       characters surrounding a sub-palindrome. In Perl, this  pattern  works;
+-       in  PCRE  it  does  not if the pattern is longer than three characters.
++       characters  surrounding  a sub-palindrome. In Perl, this pattern works;
++       in PCRE it does not if the pattern is  longer  than  three  characters.
+        Consider the subject string "abcba":
+ 
+-       At the top level, the first character is matched, but as it is  not  at
++       At  the  top level, the first character is matched, but as it is not at
+        the end of the string, the first alternative fails; the second alterna-
+        tive is taken and the recursion kicks in. The recursive call to subpat-
+-       tern  1  successfully  matches the next character ("b"). (Note that the
++       tern 1 successfully matches the next character ("b").  (Note  that  the
+        beginning and end of line tests are not part of the recursion).
+ 
+-       Back at the top level, the next character ("c") is compared  with  what
+-       subpattern  2 matched, which was "a". This fails. Because the recursion
+-       is treated as an atomic group, there are now  no  backtracking  points,
+-       and  so  the  entire  match fails. (Perl is able, at this point, to re-
+-       enter the recursion and try the second alternative.)  However,  if  the
++       Back  at  the top level, the next character ("c") is compared with what
++       subpattern 2 matched, which was "a". This fails. Because the  recursion
++       is  treated  as  an atomic group, there are now no backtracking points,
++       and so the entire match fails. (Perl is able, at  this  point,  to  re-
++       enter  the  recursion  and try the second alternative.) However, if the
+        pattern is written with the alternatives in the other order, things are
+        different:
+ 
+          ^((.)(?1)\2|.)$
+ 
+-       This time, the recursing alternative is tried first, and  continues  to
+-       recurse  until  it runs out of characters, at which point the recursion
+-       fails. But this time we do have  another  alternative  to  try  at  the
+-       higher  level.  That  is  the  big difference: in the previous case the
++       This  time,  the recursing alternative is tried first, and continues to
++       recurse until it runs out of characters, at which point  the  recursion
++       fails.  But  this  time  we  do  have another alternative to try at the
++       higher level. That is the big difference:  in  the  previous  case  the
+        remaining alternative is at a deeper recursion level, which PCRE cannot
+        use.
+ 
+-       To  change  the pattern so that it matches all palindromic strings, not
+-       just those with an odd number of characters, it is tempting  to  change
++       To change the pattern so that it matches all palindromic  strings,  not
++       just  those  with an odd number of characters, it is tempting to change
+        the pattern to this:
+ 
+          ^((.)(?1)\2|.?)$
+ 
+-       Again,  this  works  in Perl, but not in PCRE, and for the same reason.
+-       When a deeper recursion has matched a single character,  it  cannot  be
+-       entered  again  in  order  to match an empty string. The solution is to
+-       separate the two cases, and write out the odd and even cases as  alter-
++       Again, this works in Perl, but not in PCRE, and for  the  same  reason.
++       When  a  deeper  recursion has matched a single character, it cannot be
++       entered again in order to match an empty string.  The  solution  is  to
++       separate  the two cases, and write out the odd and even cases as alter-
+        natives at the higher level:
+ 
+          ^(?:((.)(?1)\2|)|((.)(?3)\4|.))
+ 
+-       If  you  want  to match typical palindromic phrases, the pattern has to
++       If you want to match typical palindromic phrases, the  pattern  has  to
+        ignore all non-word characters, which can be done like this:
+ 
+          ^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$
+ 
+        If run with the PCRE_CASELESS option, this pattern matches phrases such
+        as "A man, a plan, a canal: Panama!" and it works well in both PCRE and
+-       Perl. Note the use of the possessive quantifier *+ to avoid  backtrack-
+-       ing  into  sequences of non-word characters. Without this, PCRE takes a
+-       great deal longer (ten times or more) to  match  typical  phrases,  and
++       Perl.  Note the use of the possessive quantifier *+ to avoid backtrack-
++       ing into sequences of non-word characters. Without this, PCRE  takes  a
++       great  deal  longer  (ten  times or more) to match typical phrases, and
+        Perl takes so long that you think it has gone into a loop.
+ 
+-       WARNING:  The  palindrome-matching patterns above work only if the sub-
+-       ject string does not start with a palindrome that is shorter  than  the
+-       entire  string.  For example, although "abcba" is correctly matched, if
+-       the subject is "ababa", PCRE finds the palindrome "aba" at  the  start,
+-       then  fails at top level because the end of the string does not follow.
+-       Once again, it cannot jump back into the recursion to try other  alter-
++       WARNING: The palindrome-matching patterns above work only if  the  sub-
++       ject  string  does not start with a palindrome that is shorter than the
++       entire string.  For example, although "abcba" is correctly matched,  if
++       the  subject  is "ababa", PCRE finds the palindrome "aba" at the start,
++       then fails at top level because the end of the string does not  follow.
++       Once  again, it cannot jump back into the recursion to try other alter-
+        natives, so the entire match fails.
+ 
+-       The  second  way  in which PCRE and Perl differ in their recursion pro-
+-       cessing is in the handling of captured values. In Perl, when a  subpat-
+-       tern  is  called recursively or as a subpattern (see the next section),
+-       it has no access to any values that were captured  outside  the  recur-
+-       sion,  whereas  in  PCRE  these values can be referenced. Consider this
++       The second way in which PCRE and Perl differ in  their  recursion  pro-
++       cessing  is in the handling of captured values. In Perl, when a subpat-
++       tern is called recursively or as a subpattern (see the  next  section),
++       it  has  no  access to any values that were captured outside the recur-
++       sion, whereas in PCRE these values can  be  referenced.  Consider  this
+        pattern:
+ 
+          ^(.)(\1|a(?2))
+ 
+-       In PCRE, this pattern matches "bab". The  first  capturing  parentheses
+-       match  "b",  then in the second group, when the back reference \1 fails
+-       to match "b", the second alternative matches "a" and then recurses.  In
+-       the  recursion,  \1 does now match "b" and so the whole match succeeds.
+-       In Perl, the pattern fails to match because inside the  recursive  call
++       In  PCRE,  this  pattern matches "bab". The first capturing parentheses
++       match "b", then in the second group, when the back reference  \1  fails
++       to  match "b", the second alternative matches "a" and then recurses. In
++       the recursion, \1 does now match "b" and so the whole  match  succeeds.
++       In  Perl,  the pattern fails to match because inside the recursive call
+        \1 cannot access the externally set value.
+ 
+ 
+ SUBPATTERNS AS SUBROUTINES
+ 
+-       If  the  syntax for a recursive subpattern call (either by number or by
+-       name) is used outside the parentheses to which it refers,  it  operates
+-       like  a subroutine in a programming language. The called subpattern may
+-       be defined before or after the reference. A numbered reference  can  be
++       If the syntax for a recursive subpattern call (either by number  or  by
++       name)  is  used outside the parentheses to which it refers, it operates
++       like a subroutine in a programming language. The called subpattern  may
++       be  defined  before or after the reference. A numbered reference can be
+        absolute or relative, as in these examples:
+ 
+          (...(absolute)...)...(?2)...
+@@ -7148,79 +7172,79 @@ SUBPATTERNS AS SUBROUTINES
+ 
+          (sens|respons)e and \1ibility
+ 
+-       matches  "sense and sensibility" and "response and responsibility", but
++       matches "sense and sensibility" and "response and responsibility",  but
+        not "sense and responsibility". If instead the pattern
+ 
+          (sens|respons)e and (?1)ibility
+ 
+-       is used, it does match "sense and responsibility" as well as the  other
+-       two  strings.  Another  example  is  given  in the discussion of DEFINE
++       is  used, it does match "sense and responsibility" as well as the other
++       two strings. Another example is  given  in  the  discussion  of  DEFINE
+        above.
+ 
+-       All subroutine calls, whether recursive or not, are always  treated  as
+-       atomic  groups. That is, once a subroutine has matched some of the sub-
++       All  subroutine  calls, whether recursive or not, are always treated as
++       atomic groups. That is, once a subroutine has matched some of the  sub-
+        ject string, it is never re-entered, even if it contains untried alter-
+-       natives  and  there  is  a  subsequent  matching failure. Any capturing
+-       parentheses that are set during the subroutine  call  revert  to  their
++       natives and there is  a  subsequent  matching  failure.  Any  capturing
++       parentheses  that  are  set  during the subroutine call revert to their
+        previous values afterwards.
+ 
+-       Processing  options  such as case-independence are fixed when a subpat-
+-       tern is defined, so if it is used as a subroutine, such options  cannot
++       Processing options such as case-independence are fixed when  a  subpat-
++       tern  is defined, so if it is used as a subroutine, such options cannot
+        be changed for different calls. For example, consider this pattern:
+ 
+          (abc)(?i:(?-1))
+ 
+-       It  matches  "abcabc". It does not match "abcABC" because the change of
++       It matches "abcabc". It does not match "abcABC" because the  change  of
+        processing option does not affect the called subpattern.
+ 
+ 
+ ONIGURUMA SUBROUTINE SYNTAX
+ 
+-       For compatibility with Oniguruma, the non-Perl syntax \g followed by  a
++       For  compatibility with Oniguruma, the non-Perl syntax \g followed by a
+        name or a number enclosed either in angle brackets or single quotes, is
+-       an alternative syntax for referencing a  subpattern  as  a  subroutine,
+-       possibly  recursively. Here are two of the examples used above, rewrit-
++       an  alternative  syntax  for  referencing a subpattern as a subroutine,
++       possibly recursively. Here are two of the examples used above,  rewrit-
+        ten using this syntax:
+ 
+          (?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
+          (sens|respons)e and \g'1'ibility
+ 
+-       PCRE supports an extension to Oniguruma: if a number is preceded  by  a
++       PCRE  supports  an extension to Oniguruma: if a number is preceded by a
+        plus or a minus sign it is taken as a relative reference. For example:
+ 
+          (abc)(?i:\g<-1>)
+ 
+-       Note  that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are not
+-       synonymous. The former is a back reference; the latter is a  subroutine
++       Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are  not
++       synonymous.  The former is a back reference; the latter is a subroutine
+        call.
+ 
+ 
+ CALLOUTS
+ 
+        Perl has a feature whereby using the sequence (?{...}) causes arbitrary
+-       Perl code to be obeyed in the middle of matching a regular  expression.
++       Perl  code to be obeyed in the middle of matching a regular expression.
+        This makes it possible, amongst other things, to extract different sub-
+        strings that match the same pair of parentheses when there is a repeti-
+        tion.
+ 
+        PCRE provides a similar feature, but of course it cannot obey arbitrary
+        Perl code. The feature is called "callout". The caller of PCRE provides
+-       an  external function by putting its entry point in the global variable
+-       pcre_callout (8-bit library) or pcre[16|32]_callout (16-bit  or  32-bit
+-       library).   By default, this variable contains NULL, which disables all
++       an external function by putting its entry point in the global  variable
++       pcre_callout  (8-bit  library) or pcre[16|32]_callout (16-bit or 32-bit
++       library).  By default, this variable contains NULL, which disables  all
+        calling out.
+ 
+-       Within a regular expression, (?C) indicates the  points  at  which  the
+-       external  function  is  to be called. If you want to identify different
+-       callout points, you can put a number less than 256 after the letter  C.
+-       The  default  value is zero.  For example, this pattern has two callout
++       Within  a  regular  expression,  (?C) indicates the points at which the
++       external function is to be called. If you want  to  identify  different
++       callout  points, you can put a number less than 256 after the letter C.
++       The default value is zero.  For example, this pattern has  two  callout
+        points:
+ 
+          (?C1)abc(?C2)def
+ 
+-       If the PCRE_AUTO_CALLOUT flag is passed to a compiling function,  call-
+-       outs  are automatically installed before each item in the pattern. They
+-       are all numbered 255. If there is a conditional group  in  the  pattern
++       If  the PCRE_AUTO_CALLOUT flag is passed to a compiling function, call-
++       outs are automatically installed before each item in the pattern.  They
++       are  all  numbered  255. If there is a conditional group in the pattern
+        whose condition is an assertion, an additional callout is inserted just
+        before the condition. An explicit callout may also be set at this posi-
+        tion, as in this example:
+@@ -7230,120 +7254,120 @@ CALLOUTS
+        Note that this applies only to assertion conditions, not to other types
+        of condition.
+ 
+-       During matching, when PCRE reaches a callout point, the external  func-
+-       tion  is  called.  It  is  provided with the number of the callout, the
+-       position in the pattern, and, optionally, one item of  data  originally
+-       supplied  by  the caller of the matching function. The callout function
++       During  matching, when PCRE reaches a callout point, the external func-
++       tion is called. It is provided with the  number  of  the  callout,  the
++       position  in  the pattern, and, optionally, one item of data originally
++       supplied by the caller of the matching function. The  callout  function
+        may cause matching to proceed, to backtrack, or to fail altogether.
+ 
+-       By default, PCRE implements a number of optimizations at  compile  time
+-       and  matching  time, and one side-effect is that sometimes callouts are
+-       skipped. If you need all possible callouts to happen, you need  to  set
+-       options  that  disable  the relevant optimizations. More details, and a
+-       complete description of the interface  to  the  callout  function,  are
++       By  default,  PCRE implements a number of optimizations at compile time
++       and matching time, and one side-effect is that sometimes  callouts  are
++       skipped.  If  you need all possible callouts to happen, you need to set
++       options that disable the relevant optimizations. More  details,  and  a
++       complete  description  of  the  interface  to the callout function, are
+        given in the pcrecallout documentation.
+ 
+ 
+ BACKTRACKING CONTROL
+ 
+-       Perl  5.10 introduced a number of "Special Backtracking Control Verbs",
+-       which are still described in the Perl  documentation  as  "experimental
+-       and  subject to change or removal in a future version of Perl". It goes
+-       on to say: "Their usage in production code should  be  noted  to  avoid
+-       problems  during upgrades." The same remarks apply to the PCRE features
++       Perl 5.10 introduced a number of "Special Backtracking Control  Verbs",
++       which  are  still  described in the Perl documentation as "experimental
++       and subject to change or removal in a future version of Perl". It  goes
++       on  to  say:  "Their  usage in production code should be noted to avoid
++       problems during upgrades." The same remarks apply to the PCRE  features
+        described in this section.
+ 
+-       The new verbs make use of what was previously invalid syntax: an  open-
++       The  new verbs make use of what was previously invalid syntax: an open-
+        ing parenthesis followed by an asterisk. They are generally of the form
+-       (*VERB) or (*VERB:NAME). Some may take either form,  possibly  behaving
+-       differently  depending  on  whether or not a name is present. A name is
++       (*VERB)  or  (*VERB:NAME). Some may take either form, possibly behaving
++       differently depending on whether or not a name is present.  A  name  is
+        any sequence of characters that does not include a closing parenthesis.
+        The maximum length of name is 255 in the 8-bit library and 65535 in the
+-       16-bit and 32-bit libraries. If the name is  empty,  that  is,  if  the
+-       closing  parenthesis immediately follows the colon, the effect is as if
+-       the colon were not there.  Any number of these verbs  may  occur  in  a
++       16-bit  and  32-bit  libraries.  If  the name is empty, that is, if the
++       closing parenthesis immediately follows the colon, the effect is as  if
++       the  colon  were  not  there.  Any number of these verbs may occur in a
+        pattern.
+ 
+-       Since  these  verbs  are  specifically related to backtracking, most of
+-       them can be used only when the pattern is to be matched  using  one  of
+-       the  traditional  matching  functions, because these use a backtracking
+-       algorithm. With the exception of (*FAIL), which behaves like a  failing
+-       negative  assertion,  the  backtracking control verbs cause an error if
++       Since these verbs are specifically related  to  backtracking,  most  of
++       them  can  be  used only when the pattern is to be matched using one of
++       the traditional matching functions, because these  use  a  backtracking
++       algorithm.  With the exception of (*FAIL), which behaves like a failing
++       negative assertion, the backtracking control verbs cause  an  error  if
+        encountered by a DFA matching function.
+ 
+-       The behaviour of these verbs in repeated  groups,  assertions,  and  in
++       The  behaviour  of  these  verbs in repeated groups, assertions, and in
+        subpatterns called as subroutines (whether or not recursively) is docu-
+        mented below.
+ 
+    Optimizations that affect backtracking verbs
+ 
+-       PCRE contains some optimizations that are used to speed up matching  by
++       PCRE  contains some optimizations that are used to speed up matching by
+        running some checks at the start of each match attempt. For example, it
+-       may know the minimum length of matching subject, or that  a  particular
++       may  know  the minimum length of matching subject, or that a particular
+        character must be present. When one of these optimizations bypasses the
+-       running of a match,  any  included  backtracking  verbs  will  not,  of
++       running  of  a  match,  any  included  backtracking  verbs will not, of
+        course, be processed. You can suppress the start-of-match optimizations
+-       by setting the PCRE_NO_START_OPTIMIZE  option  when  calling  pcre_com-
++       by  setting  the  PCRE_NO_START_OPTIMIZE  option when calling pcre_com-
+        pile() or pcre_exec(), or by starting the pattern with (*NO_START_OPT).
+        There is more discussion of this option in the section entitled "Option
+        bits for pcre_exec()" in the pcreapi documentation.
+ 
+-       Experiments  with  Perl  suggest that it too has similar optimizations,
++       Experiments with Perl suggest that it too  has  similar  optimizations,
+        sometimes leading to anomalous results.
+ 
+    Verbs that act immediately
+ 
+-       The following verbs act as soon as they are encountered. They  may  not
++       The  following  verbs act as soon as they are encountered. They may not
+        be followed by a name.
+ 
+           (*ACCEPT)
+ 
+-       This  verb causes the match to end successfully, skipping the remainder
+-       of the pattern. However, when it is inside a subpattern that is  called
+-       as  a  subroutine, only that subpattern is ended successfully. Matching
++       This verb causes the match to end successfully, skipping the  remainder
++       of  the pattern. However, when it is inside a subpattern that is called
++       as a subroutine, only that subpattern is ended  successfully.  Matching
+        then continues at the outer level. If (*ACCEPT) in triggered in a posi-
+-       tive  assertion,  the  assertion succeeds; in a negative assertion, the
++       tive assertion, the assertion succeeds; in a  negative  assertion,  the
+        assertion fails.
+ 
+-       If (*ACCEPT) is inside capturing parentheses, the data so far  is  cap-
++       If  (*ACCEPT)  is inside capturing parentheses, the data so far is cap-
+        tured. For example:
+ 
+          A((?:A|B(*ACCEPT)|C)D)
+ 
+-       This  matches  "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap-
++       This matches "AB", "AAD", or "ACD"; when it matches "AB", "B"  is  cap-
+        tured by the outer parentheses.
+ 
+          (*FAIL) or (*F)
+ 
+-       This verb causes a matching failure, forcing backtracking to occur.  It
+-       is  equivalent to (?!) but easier to read. The Perl documentation notes
+-       that it is probably useful only when combined  with  (?{})  or  (??{}).
+-       Those  are,  of course, Perl features that are not present in PCRE. The
+-       nearest equivalent is the callout feature, as for example in this  pat-
++       This  verb causes a matching failure, forcing backtracking to occur. It
++       is equivalent to (?!) but easier to read. The Perl documentation  notes
++       that  it  is  probably  useful only when combined with (?{}) or (??{}).
++       Those are, of course, Perl features that are not present in  PCRE.  The
++       nearest  equivalent is the callout feature, as for example in this pat-
+        tern:
+ 
+          a+(?C)(*FAIL)
+ 
+-       A  match  with the string "aaaa" always fails, but the callout is taken
++       A match with the string "aaaa" always fails, but the callout  is  taken
+        before each backtrack happens (in this example, 10 times).
+ 
+    Recording which path was taken
+ 
+-       There is one verb whose main purpose  is  to  track  how  a  match  was
+-       arrived  at,  though  it  also  has a secondary use in conjunction with
++       There  is  one  verb  whose  main  purpose  is to track how a match was
++       arrived at, though it also has a  secondary  use  in  conjunction  with
+        advancing the match starting point (see (*SKIP) below).
+ 
+          (*MARK:NAME) or (*:NAME)
+ 
+-       A name is always  required  with  this  verb.  There  may  be  as  many
+-       instances  of  (*MARK) as you like in a pattern, and their names do not
++       A  name  is  always  required  with  this  verb.  There  may be as many
++       instances of (*MARK) as you like in a pattern, and their names  do  not
+        have to be unique.
+ 
+-       When a match succeeds, the name of the  last-encountered  (*MARK:NAME),
+-       (*PRUNE:NAME),  or  (*THEN:NAME) on the matching path is passed back to
+-       the caller as  described  in  the  section  entitled  "Extra  data  for
+-       pcre_exec()"  in  the  pcreapi  documentation.  Here  is  an example of
+-       pcretest output, where the /K modifier requests the retrieval and  out-
++       When  a  match succeeds, the name of the last-encountered (*MARK:NAME),
++       (*PRUNE:NAME), or (*THEN:NAME) on the matching path is passed  back  to
++       the  caller  as  described  in  the  section  entitled  "Extra data for
++       pcre_exec()" in the  pcreapi  documentation.  Here  is  an  example  of
++       pcretest  output, where the /K modifier requests the retrieval and out-
+        putting of (*MARK) data:
+ 
+            re> /X(*MARK:A)Y|X(*MARK:B)Z/K
+@@ -7355,73 +7379,73 @@ BACKTRACKING CONTROL
+          MK: B
+ 
+        The (*MARK) name is tagged with "MK:" in this output, and in this exam-
+-       ple it indicates which of the two alternatives matched. This is a  more
+-       efficient  way of obtaining this information than putting each alterna-
++       ple  it indicates which of the two alternatives matched. This is a more
++       efficient way of obtaining this information than putting each  alterna-
+        tive in its own capturing parentheses.
+ 
+-       If a verb with a name is encountered in a positive  assertion  that  is
+-       true,  the  name  is recorded and passed back if it is the last-encoun-
++       If  a  verb  with a name is encountered in a positive assertion that is
++       true, the name is recorded and passed back if it  is  the  last-encoun-
+        tered. This does not happen for negative assertions or failing positive
+        assertions.
+ 
+-       After  a  partial match or a failed match, the last encountered name in
++       After a partial match or a failed match, the last encountered  name  in
+        the entire match process is returned. For example:
+ 
+            re> /X(*MARK:A)Y|X(*MARK:B)Z/K
+          data> XP
+          No match, mark = B
+ 
+-       Note that in this unanchored example the  mark  is  retained  from  the
++       Note  that  in  this  unanchored  example the mark is retained from the
+        match attempt that started at the letter "X" in the subject. Subsequent
+        match attempts starting at "P" and then with an empty string do not get
+        as far as the (*MARK) item, but nevertheless do not reset it.
+ 
+-       If  you  are  interested  in  (*MARK)  values after failed matches, you
+-       should probably set the PCRE_NO_START_OPTIMIZE option  (see  above)  to
++       If you are interested in  (*MARK)  values  after  failed  matches,  you
++       should  probably  set  the PCRE_NO_START_OPTIMIZE option (see above) to
+        ensure that the match is always attempted.
+ 
+    Verbs that act after backtracking
+ 
+        The following verbs do nothing when they are encountered. Matching con-
+-       tinues with what follows, but if there is no subsequent match,  causing
+-       a  backtrack  to  the  verb, a failure is forced. That is, backtracking
+-       cannot pass to the left of the verb. However, when one of  these  verbs
++       tinues  with what follows, but if there is no subsequent match, causing
++       a backtrack to the verb, a failure is  forced.  That  is,  backtracking
++       cannot  pass  to the left of the verb. However, when one of these verbs
+        appears inside an atomic group or an assertion that is true, its effect
+-       is confined to that group, because once the  group  has  been  matched,
+-       there  is never any backtracking into it. In this situation, backtrack-
+-       ing can "jump back" to the left of the entire atomic  group  or  asser-
+-       tion.  (Remember  also,  as  stated  above, that this localization also
++       is  confined  to  that  group, because once the group has been matched,
++       there is never any backtracking into it. In this situation,  backtrack-
++       ing  can  "jump  back" to the left of the entire atomic group or asser-
++       tion. (Remember also, as stated  above,  that  this  localization  also
+        applies in subroutine calls.)
+ 
+-       These verbs differ in exactly what kind of failure  occurs  when  back-
+-       tracking  reaches  them.  The behaviour described below is what happens
+-       when the verb is not in a subroutine or an assertion.  Subsequent  sec-
++       These  verbs  differ  in exactly what kind of failure occurs when back-
++       tracking reaches them. The behaviour described below  is  what  happens
++       when  the  verb is not in a subroutine or an assertion. Subsequent sec-
+        tions cover these special cases.
+ 
+          (*COMMIT)
+ 
+-       This  verb, which may not be followed by a name, causes the whole match
++       This verb, which may not be followed by a name, causes the whole  match
+        to fail outright if there is a later matching failure that causes back-
+-       tracking  to  reach  it.  Even if the pattern is unanchored, no further
++       tracking to reach it. Even if the pattern  is  unanchored,  no  further
+        attempts to find a match by advancing the starting point take place. If
+-       (*COMMIT)  is  the  only backtracking verb that is encountered, once it
++       (*COMMIT) is the only backtracking verb that is  encountered,  once  it
+        has been passed pcre_exec() is committed to finding a match at the cur-
+        rent starting point, or not at all. For example:
+ 
+          a+(*COMMIT)b
+ 
+-       This  matches  "xxaab" but not "aacaab". It can be thought of as a kind
++       This matches "xxaab" but not "aacaab". It can be thought of as  a  kind
+        of dynamic anchor, or "I've started, so I must finish." The name of the
+-       most  recently passed (*MARK) in the path is passed back when (*COMMIT)
++       most recently passed (*MARK) in the path is passed back when  (*COMMIT)
+        forces a match failure.
+ 
+-       If there is more than one backtracking verb in a pattern,  a  different
+-       one  that  follows  (*COMMIT) may be triggered first, so merely passing
++       If  there  is more than one backtracking verb in a pattern, a different
++       one that follows (*COMMIT) may be triggered first,  so  merely  passing
+        (*COMMIT) during a match does not always guarantee that a match must be
+        at this starting point.
+ 
+-       Note  that  (*COMMIT)  at  the start of a pattern is not the same as an
+-       anchor, unless PCRE's start-of-match optimizations are turned  off,  as
++       Note that (*COMMIT) at the start of a pattern is not  the  same  as  an
++       anchor,  unless  PCRE's start-of-match optimizations are turned off, as
+        shown in this output from pcretest:
+ 
+            re> /(*COMMIT)abc/
+@@ -7432,207 +7456,207 @@ BACKTRACKING CONTROL
+ 
+        For this pattern, PCRE knows that any match must start with "a", so the
+        optimization skips along the subject to "a" before applying the pattern
+-       to  the first set of data. The match attempt then succeeds. In the sec-
+-       ond set of data, the escape sequence \Y is interpreted by the  pcretest
+-       program.  It  causes  the  PCRE_NO_START_OPTIMIZE option to be set when
++       to the first set of data. The match attempt then succeeds. In the  sec-
++       ond  set of data, the escape sequence \Y is interpreted by the pcretest
++       program. It causes the PCRE_NO_START_OPTIMIZE option  to  be  set  when
+        pcre_exec() is called.  This disables the optimization that skips along
+        to the first character. The pattern is now applied starting at "x", and
+-       so the (*COMMIT) causes the match to  fail  without  trying  any  other
++       so  the  (*COMMIT)  causes  the  match to fail without trying any other
+        starting points.
+ 
+          (*PRUNE) or (*PRUNE:NAME)
+ 
+-       This  verb causes the match to fail at the current starting position in
++       This verb causes the match to fail at the current starting position  in
+        the subject if there is a later matching failure that causes backtrack-
+-       ing  to  reach it. If the pattern is unanchored, the normal "bumpalong"
+-       advance to the next starting character then happens.  Backtracking  can
+-       occur  as  usual to the left of (*PRUNE), before it is reached, or when
+-       matching to the right of (*PRUNE), but if there  is  no  match  to  the
+-       right,  backtracking cannot cross (*PRUNE). In simple cases, the use of
+-       (*PRUNE) is just an alternative to an atomic group or possessive  quan-
++       ing to reach it. If the pattern is unanchored, the  normal  "bumpalong"
++       advance  to  the next starting character then happens. Backtracking can
++       occur as usual to the left of (*PRUNE), before it is reached,  or  when
++       matching  to  the  right  of  (*PRUNE), but if there is no match to the
++       right, backtracking cannot cross (*PRUNE). In simple cases, the use  of
++       (*PRUNE)  is just an alternative to an atomic group or possessive quan-
+        tifier, but there are some uses of (*PRUNE) that cannot be expressed in
+-       any other way. In an anchored pattern (*PRUNE) has the same  effect  as
++       any  other  way. In an anchored pattern (*PRUNE) has the same effect as
+        (*COMMIT).
+ 
+        The   behaviour   of   (*PRUNE:NAME)   is   the   not   the   same   as
+-       (*MARK:NAME)(*PRUNE).  It is like (*MARK:NAME)  in  that  the  name  is
+-       remembered  for  passing  back  to  the  caller.  However, (*SKIP:NAME)
++       (*MARK:NAME)(*PRUNE).   It  is  like  (*MARK:NAME)  in that the name is
++       remembered for  passing  back  to  the  caller.  However,  (*SKIP:NAME)
+        searches only for names set with (*MARK).
+ 
+          (*SKIP)
+ 
+-       This verb, when given without a name, is like (*PRUNE), except that  if
+-       the  pattern  is unanchored, the "bumpalong" advance is not to the next
++       This  verb, when given without a name, is like (*PRUNE), except that if
++       the pattern is unanchored, the "bumpalong" advance is not to  the  next
+        character, but to the position in the subject where (*SKIP) was encoun-
+-       tered.  (*SKIP)  signifies that whatever text was matched leading up to
++       tered. (*SKIP) signifies that whatever text was matched leading  up  to
+        it cannot be part of a successful match. Consider:
+ 
+          a+(*SKIP)b
+ 
+-       If the subject is "aaaac...",  after  the  first  match  attempt  fails
+-       (starting  at  the  first  character in the string), the starting point
++       If  the  subject  is  "aaaac...",  after  the first match attempt fails
++       (starting at the first character in the  string),  the  starting  point
+        skips on to start the next attempt at "c". Note that a possessive quan-
+-       tifer  does not have the same effect as this example; although it would
+-       suppress backtracking  during  the  first  match  attempt,  the  second
+-       attempt  would  start at the second character instead of skipping on to
++       tifer does not have the same effect as this example; although it  would
++       suppress  backtracking  during  the  first  match  attempt,  the second
++       attempt would start at the second character instead of skipping  on  to
+        "c".
+ 
+          (*SKIP:NAME)
+ 
+        When (*SKIP) has an associated name, its behaviour is modified. When it
+        is triggered, the previous path through the pattern is searched for the
+-       most recent (*MARK) that has the  same  name.  If  one  is  found,  the
++       most  recent  (*MARK)  that  has  the  same  name. If one is found, the
+        "bumpalong" advance is to the subject position that corresponds to that
+        (*MARK) instead of to where (*SKIP) was encountered. If no (*MARK) with
+        a matching name is found, the (*SKIP) is ignored.
+ 
+-       Note  that (*SKIP:NAME) searches only for names set by (*MARK:NAME). It
++       Note that (*SKIP:NAME) searches only for names set by (*MARK:NAME).  It
+        ignores names that are set by (*PRUNE:NAME) or (*THEN:NAME).
+ 
+          (*THEN) or (*THEN:NAME)
+ 
+-       This verb causes a skip to the next innermost  alternative  when  back-
+-       tracking  reaches  it.  That  is,  it  cancels any further backtracking
+-       within the current alternative. Its name  comes  from  the  observation
++       This  verb  causes  a skip to the next innermost alternative when back-
++       tracking reaches it. That  is,  it  cancels  any  further  backtracking
++       within  the  current  alternative.  Its name comes from the observation
+        that it can be used for a pattern-based if-then-else block:
+ 
+          ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
+ 
+-       If  the COND1 pattern matches, FOO is tried (and possibly further items
+-       after the end of the group if FOO succeeds); on  failure,  the  matcher
+-       skips  to  the second alternative and tries COND2, without backtracking
+-       into COND1. If that succeeds and BAR fails, COND3 is tried.  If  subse-
+-       quently  BAZ fails, there are no more alternatives, so there is a back-
+-       track to whatever came before the  entire  group.  If  (*THEN)  is  not
++       If the COND1 pattern matches, FOO is tried (and possibly further  items
++       after  the  end  of the group if FOO succeeds); on failure, the matcher
++       skips to the second alternative and tries COND2,  without  backtracking
++       into  COND1.  If that succeeds and BAR fails, COND3 is tried. If subse-
++       quently BAZ fails, there are no more alternatives, so there is a  back-
++       track  to  whatever  came  before  the  entire group. If (*THEN) is not
+        inside an alternation, it acts like (*PRUNE).
+ 
+-       The    behaviour   of   (*THEN:NAME)   is   the   not   the   same   as
+-       (*MARK:NAME)(*THEN).  It is like  (*MARK:NAME)  in  that  the  name  is
+-       remembered  for  passing  back  to  the  caller.  However, (*SKIP:NAME)
++       The   behaviour   of   (*THEN:NAME)   is   the   not   the   same    as
++       (*MARK:NAME)(*THEN).   It  is  like  (*MARK:NAME)  in  that the name is
++       remembered for  passing  back  to  the  caller.  However,  (*SKIP:NAME)
+        searches only for names set with (*MARK).
+ 
+-       A subpattern that does not contain a | character is just a part of  the
+-       enclosing  alternative;  it  is  not a nested alternation with only one
+-       alternative. The effect of (*THEN) extends beyond such a subpattern  to
+-       the  enclosing alternative. Consider this pattern, where A, B, etc. are
+-       complex pattern fragments that do not contain any | characters at  this
++       A  subpattern that does not contain a | character is just a part of the
++       enclosing alternative; it is not a nested  alternation  with  only  one
++       alternative.  The effect of (*THEN) extends beyond such a subpattern to
++       the enclosing alternative. Consider this pattern, where A, B, etc.  are
++       complex  pattern fragments that do not contain any | characters at this
+        level:
+ 
+          A (B(*THEN)C) | D
+ 
+-       If  A and B are matched, but there is a failure in C, matching does not
++       If A and B are matched, but there is a failure in C, matching does  not
+        backtrack into A; instead it moves to the next alternative, that is, D.
+-       However,  if the subpattern containing (*THEN) is given an alternative,
++       However, if the subpattern containing (*THEN) is given an  alternative,
+        it behaves differently:
+ 
+          A (B(*THEN)C | (*FAIL)) | D
+ 
+-       The effect of (*THEN) is now confined to the inner subpattern. After  a
++       The  effect of (*THEN) is now confined to the inner subpattern. After a
+        failure in C, matching moves to (*FAIL), which causes the whole subpat-
+-       tern to fail because there are no more alternatives  to  try.  In  this
++       tern  to  fail  because  there are no more alternatives to try. In this
+        case, matching does now backtrack into A.
+ 
+-       Note  that  a  conditional  subpattern  is not considered as having two
+-       alternatives, because only one is ever used.  In  other  words,  the  |
++       Note that a conditional subpattern is  not  considered  as  having  two
++       alternatives,  because  only  one  is  ever used. In other words, the |
+        character in a conditional subpattern has a different meaning. Ignoring
+        white space, consider:
+ 
+          ^.*? (?(?=a) a | b(*THEN)c )
+ 
+-       If the subject is "ba", this pattern does not  match.  Because  .*?  is
+-       ungreedy,  it  initially  matches  zero characters. The condition (?=a)
+-       then fails, the character "b" is matched,  but  "c"  is  not.  At  this
+-       point,  matching does not backtrack to .*? as might perhaps be expected
+-       from the presence of the | character.  The  conditional  subpattern  is
++       If  the  subject  is  "ba", this pattern does not match. Because .*? is
++       ungreedy, it initially matches zero  characters.  The  condition  (?=a)
++       then  fails,  the  character  "b"  is  matched, but "c" is not. At this
++       point, matching does not backtrack to .*? as might perhaps be  expected
++       from  the  presence  of  the | character. The conditional subpattern is
+        part of the single alternative that comprises the whole pattern, and so
+-       the match fails. (If there was a backtrack into  .*?,  allowing  it  to
++       the  match  fails.  (If  there was a backtrack into .*?, allowing it to
+        match "b", the match would succeed.)
+ 
+-       The  verbs just described provide four different "strengths" of control
++       The verbs just described provide four different "strengths" of  control
+        when subsequent matching fails. (*THEN) is the weakest, carrying on the
+-       match  at  the next alternative. (*PRUNE) comes next, failing the match
+-       at the current starting position, but allowing an advance to  the  next
+-       character  (for an unanchored pattern). (*SKIP) is similar, except that
++       match at the next alternative. (*PRUNE) comes next, failing  the  match
++       at  the  current starting position, but allowing an advance to the next
++       character (for an unanchored pattern). (*SKIP) is similar, except  that
+        the advance may be more than one character. (*COMMIT) is the strongest,
+        causing the entire match to fail.
+ 
+    More than one backtracking verb
+ 
+-       If  more  than  one  backtracking verb is present in a pattern, the one
+-       that is backtracked onto first acts. For example,  consider  this  pat-
++       If more than one backtracking verb is present in  a  pattern,  the  one
++       that  is  backtracked  onto first acts. For example, consider this pat-
+        tern, where A, B, etc. are complex pattern fragments:
+ 
+          (A(*COMMIT)B(*THEN)C|ABD)
+ 
+-       If  A matches but B fails, the backtrack to (*COMMIT) causes the entire
++       If A matches but B fails, the backtrack to (*COMMIT) causes the  entire
+        match to fail. However, if A and B match, but C fails, the backtrack to
+-       (*THEN)  causes  the next alternative (ABD) to be tried. This behaviour
+-       is consistent, but is not always the same as Perl's. It means  that  if
+-       two  or  more backtracking verbs appear in succession, all the the last
++       (*THEN) causes the next alternative (ABD) to be tried.  This  behaviour
++       is  consistent,  but is not always the same as Perl's. It means that if
++       two or more backtracking verbs appear in succession, all the  the  last
+        of them has no effect. Consider this example:
+ 
+          ...(*COMMIT)(*PRUNE)...
+ 
+        If there is a matching failure to the right, backtracking onto (*PRUNE)
+-       causes  it to be triggered, and its action is taken. There can never be
++       causes it to be triggered, and its action is taken. There can never  be
+        a backtrack onto (*COMMIT).
+ 
+    Backtracking verbs in repeated groups
+ 
+-       PCRE differs from  Perl  in  its  handling  of  backtracking  verbs  in
++       PCRE  differs  from  Perl  in  its  handling  of  backtracking verbs in
+        repeated groups. For example, consider:
+ 
+          /(a(*COMMIT)b)+ac/
+ 
+-       If  the  subject  is  "abac",  Perl matches, but PCRE fails because the
++       If the subject is "abac", Perl matches,  but  PCRE  fails  because  the
+        (*COMMIT) in the second repeat of the group acts.
+ 
+    Backtracking verbs in assertions
+ 
+-       (*FAIL) in an assertion has its normal effect: it forces  an  immediate
++       (*FAIL)  in  an assertion has its normal effect: it forces an immediate
+        backtrack.
+ 
+        (*ACCEPT) in a positive assertion causes the assertion to succeed with-
+-       out any further processing. In a negative assertion,  (*ACCEPT)  causes
++       out  any  further processing. In a negative assertion, (*ACCEPT) causes
+        the assertion to fail without any further processing.
+ 
+-       The  other  backtracking verbs are not treated specially if they appear
+-       in a positive assertion. In  particular,  (*THEN)  skips  to  the  next
+-       alternative  in  the  innermost  enclosing group that has alternations,
++       The other backtracking verbs are not treated specially if  they  appear
++       in  a  positive  assertion.  In  particular,  (*THEN) skips to the next
++       alternative in the innermost enclosing  group  that  has  alternations,
+        whether or not this is within the assertion.
+ 
+-       Negative assertions are, however, different, in order  to  ensure  that
+-       changing  a  positive  assertion  into a negative assertion changes its
++       Negative  assertions  are,  however, different, in order to ensure that
++       changing a positive assertion into a  negative  assertion  changes  its
+        result. Backtracking into (*COMMIT), (*SKIP), or (*PRUNE) causes a neg-
+        ative assertion to be true, without considering any further alternative
+        branches in the assertion.  Backtracking into (*THEN) causes it to skip
+-       to  the next enclosing alternative within the assertion (the normal be-
+-       haviour), but if the assertion  does  not  have  such  an  alternative,
++       to the next enclosing alternative within the assertion (the normal  be-
++       haviour),  but  if  the  assertion  does  not have such an alternative,
+        (*THEN) behaves like (*PRUNE).
+ 
+    Backtracking verbs in subroutines
+ 
+-       These  behaviours  occur whether or not the subpattern is called recur-
++       These behaviours occur whether or not the subpattern is  called  recur-
+        sively.  Perl's treatment of subroutines is different in some cases.
+ 
+-       (*FAIL) in a subpattern called as a subroutine has its  normal  effect:
++       (*FAIL)  in  a subpattern called as a subroutine has its normal effect:
+        it forces an immediate backtrack.
+ 
+-       (*ACCEPT)  in a subpattern called as a subroutine causes the subroutine
+-       match to succeed without any further processing. Matching then  contin-
++       (*ACCEPT) in a subpattern called as a subroutine causes the  subroutine
++       match  to succeed without any further processing. Matching then contin-
+        ues after the subroutine call.
+ 
+        (*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine
+        cause the subroutine match to fail.
+ 
+-       (*THEN) skips to the next alternative in the innermost enclosing  group
+-       within  the subpattern that has alternatives. If there is no such group
++       (*THEN)  skips to the next alternative in the innermost enclosing group
++       within the subpattern that has alternatives. If there is no such  group
+        within the subpattern, (*THEN) causes the subroutine match to fail.
+ 
+ 
+ SEE ALSO
+ 
+-       pcreapi(3), pcrecallout(3),  pcrematching(3),  pcresyntax(3),  pcre(3),
++       pcreapi(3),  pcrecallout(3),  pcrematching(3),  pcresyntax(3), pcre(3),
+        pcre16(3), pcre32(3).
+ 
+ 
+@@ -7645,8 +7669,8 @@ AUTHOR
+ 
+ REVISION
+ 
+-       Last updated: 08 January 2014
+-       Copyright (c) 1997-2014 University of Cambridge.
++       Last updated: 14 June 2015
++       Copyright (c) 1997-2015 University of Cambridge.
+ ------------------------------------------------------------------------------
+ 
+ 
+diff --git a/ext/pcre/pcrelib/pcre.h b/ext/pcre/pcrelib/pcre.h
+index 58ed46a..bf6351f 100644
+--- a/ext/pcre/pcrelib/pcre.h
++++ b/ext/pcre/pcrelib/pcre.h
+@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
+ /* The current PCRE version information. */
+ 
+ #define PCRE_MAJOR          8
+-#define PCRE_MINOR          37
++#define PCRE_MINOR          38
+ #define PCRE_PRERELEASE     
+-#define PCRE_DATE           2015-04-28
++#define PCRE_DATE           2015-11-23
+ 
+ /* When an application links to a PCRE DLL in Windows, the symbols that are
+ imported have to be identified as such. When building PCRE, the appropriate
+diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c
+index 0efad26..4d3b313 100644
+--- a/ext/pcre/pcrelib/pcre_compile.c
++++ b/ext/pcre/pcrelib/pcre_compile.c
+@@ -174,7 +174,7 @@ static const short int escapes[] = {
+      -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
+      CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
+      CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
+-     CHAR_GRAVE_ACCENT,       7,
++     CHAR_GRAVE_ACCENT,       ESC_a,
+      -ESC_b,                  0,
+      -ESC_d,                  ESC_e,
+      ESC_f,                   0,
+@@ -202,9 +202,9 @@ static const short int escapes[] = {
+ /*  68 */     0,     0,    '|',     ',',    '%',   '_',    '>',    '?',
+ /*  70 */     0,     0,      0,       0,      0,     0,      0,      0,
+ /*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',
+-/*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
++/*  80 */     0, ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
+ /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
+-/*  90 */     0,     0, -ESC_k,     'l',      0, ESC_n,      0, -ESC_p,
++/*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p,
+ /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
+ /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
+ /*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
+@@ -219,6 +219,12 @@ static const short int escapes[] = {
+ /*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
+ /*  F8 */     0,     0,      0,       0,      0,     0,      0,      0
+ };
++
++/* We also need a table of characters that may follow \c in an EBCDIC
++environment for characters 0-31. */
++
++static unsigned char ebcdic_escape_c[] = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
++
+ #endif
+ 
+ 
+@@ -458,7 +464,7 @@ static const char error_texts[] =
+   "range out of order in character class\0"
+   "nothing to repeat\0"
+   /* 10 */
+-  "operand of unlimited repeat could match the empty string\0"  /** DEAD **/
++  "internal error: invalid forward reference offset\0"
+   "internal error: unexpected repeat\0"
+   "unrecognized character after (? or (?-\0"
+   "POSIX named classes are supported only within a class\0"
+@@ -527,7 +533,11 @@ static const char error_texts[] =
+   "different names for subpatterns of the same number are not allowed\0"
+   "(*MARK) must have an argument\0"
+   "this version of PCRE is not compiled with Unicode property support\0"
++#ifndef EBCDIC
+   "\\c must be followed by an ASCII character\0"
++#else
++  "\\c must be followed by a letter or one of [\\]^_?\0"
++#endif
+   "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
+   /* 70 */
+   "internal error: unknown opcode in find_fixedlength()\0"
+@@ -1425,7 +1435,16 @@ else
+     c ^= 0x40;
+ #else             /* EBCDIC coding */
+     if (c >= CHAR_a && c <= CHAR_z) c += 64;
+-    c ^= 0xC0;
++    if (c == CHAR_QUESTION_MARK)
++      c = ('\\' == 188 && '`' == 74)? 0x5f : 0xff;
++    else
++      {
++      for (i = 0; i < 32; i++)
++        {
++        if (c == ebcdic_escape_c[i]) break;
++        }
++      if (i < 32) c = i; else *errorcodeptr = ERR68;
++      }
+ #endif
+     break;
+ 
+@@ -1799,7 +1818,7 @@ for (;;)
+     case OP_ASSERTBACK:
+     case OP_ASSERTBACK_NOT:
+     do cc += GET(cc, 1); while (*cc == OP_ALT);
+-    cc += PRIV(OP_lengths)[*cc];
++    cc += 1 + LINK_SIZE;
+     break;
+ 
+     /* Skip over things that don't match chars */
+@@ -2487,7 +2506,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
+   if (c == OP_BRA  || c == OP_BRAPOS ||
+       c == OP_CBRA || c == OP_CBRAPOS ||
+       c == OP_ONCE || c == OP_ONCE_NC ||
+-      c == OP_COND)
++      c == OP_COND || c == OP_SCOND)
+     {
+     BOOL empty_branch;
+     if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
+@@ -3886,11 +3905,11 @@ didn't consider this to be a POSIX class. Likewise for [:1234:].
+ The problem in trying to be exactly like Perl is in the handling of escapes. We
+ have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
+ class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
+-below handles the special case of \], but does not try to do any other escape
+-processing. This makes it different from Perl for cases such as [:l\ower:]
+-where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
+-"l\ower". This is a lesser evil than not diagnosing bad classes when Perl does,
+-I think.
++below handles the special cases \\ and \], but does not try to do any other
++escape processing. This makes it different from Perl for cases such as
++[:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does
++not recognize "l\ower". This is a lesser evil than not diagnosing bad classes
++when Perl does, I think.
+ 
+ A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
+ It seems that the appearance of a nested POSIX class supersedes an apparent
+@@ -3917,21 +3936,16 @@ pcre_uchar terminator;          /* Don't combine these lines; the Solaris cc */
+ terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
+ for (++ptr; *ptr != CHAR_NULL; ptr++)
+   {
+-  if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
++  if (*ptr == CHAR_BACKSLASH &&
++      (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET ||
++       ptr[1] == CHAR_BACKSLASH))
+     ptr++;
+-  else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
+-  else
++  else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) ||
++            *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
++  else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
+     {
+-    if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
+-      {
+-      *endptr = ptr;
+-      return TRUE;
+-      }
+-    if (*ptr == CHAR_LEFT_SQUARE_BRACKET &&
+-         (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
+-          ptr[1] == CHAR_EQUALS_SIGN) &&
+-        check_posix_syntax(ptr, endptr))
+-      return FALSE;
++    *endptr = ptr;
++    return TRUE;
+     }
+   }
+ return FALSE;
+@@ -3985,11 +3999,12 @@ have their offsets adjusted. That one of the jobs of this function. Before it
+ is called, the partially compiled regex must be temporarily terminated with
+ OP_END.
+ 
+-This function has been extended with the possibility of forward references for
+-recursions and subroutine calls. It must also check the list of such references
+-for the group we are dealing with. If it finds that one of the recursions in
+-the current group is on this list, it adjusts the offset in the list, not the
+-value in the reference (which is a group number).
++This function has been extended to cope with forward references for recursions
++and subroutine calls. It must check the list of such references for the
++group we are dealing with. If it finds that one of the recursions in the
++current group is on this list, it does not adjust the value in the reference
++(which is a group number). After the group has been scanned, all the offsets in
++the forward reference list for the group are adjusted.
+ 
+ Arguments:
+   group      points to the start of the group
+@@ -4005,29 +4020,21 @@ static void
+ adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
+   size_t save_hwm_offset)
+ {
++int offset;
++pcre_uchar *hc;
+ pcre_uchar *ptr = group;
+ 
+ while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
+   {
+-  int offset;
+-  pcre_uchar *hc;
+-
+-  /* See if this recursion is on the forward reference list. If so, adjust the
+-  reference. */
+-
+   for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
+        hc += LINK_SIZE)
+     {
+     offset = (int)GET(hc, 0);
+-    if (cd->start_code + offset == ptr + 1)
+-      {
+-      PUT(hc, 0, offset + adjust);
+-      break;
+-      }
++    if (cd->start_code + offset == ptr + 1) break;
+     }
+ 
+-  /* Otherwise, adjust the recursion offset if it's after the start of this
+-  group. */
++  /* If we have not found this recursion on the forward reference list, adjust
++  the recursion's offset if it's after the start of this group. */
+ 
+   if (hc >= cd->hwm)
+     {
+@@ -4037,6 +4044,15 @@ while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
+ 
+   ptr += 1 + LINK_SIZE;
+   }
++
++/* Now adjust all forward reference offsets for the group. */
++
++for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
++     hc += LINK_SIZE)
++  {
++  offset = (int)GET(hc, 0);
++  PUT(hc, 0, offset + adjust);
++  }
+ }
+ 
+ 
+@@ -4465,7 +4481,7 @@ const pcre_uchar *tempptr;
+ const pcre_uchar *nestptr = NULL;
+ pcre_uchar *previous = NULL;
+ pcre_uchar *previous_callout = NULL;
+-size_t save_hwm_offset = 0;
++size_t item_hwm_offset = 0;
+ pcre_uint8 classbits[32];
+ 
+ /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
+@@ -4623,8 +4639,7 @@ for (;; ptr++)
+   /* In the real compile phase, just check the workspace used by the forward
+   reference list. */
+ 
+-  else if (cd->hwm > cd->start_workspace + cd->workspace_size -
+-           WORK_SIZE_SAFETY_MARGIN)
++  else if (cd->hwm > cd->start_workspace + cd->workspace_size)
+     {
+     *errorcodeptr = ERR52;
+     goto FAILED;
+@@ -4767,6 +4782,7 @@ for (;; ptr++)
+     zeroreqchar = reqchar;
+     zeroreqcharflags = reqcharflags;
+     previous = code;
++    item_hwm_offset = cd->hwm - cd->start_workspace;
+     *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
+     break;
+ 
+@@ -4818,6 +4834,7 @@ for (;; ptr++)
+     /* Handle a real character class. */
+ 
+     previous = code;
++    item_hwm_offset = cd->hwm - cd->start_workspace;
+ 
+     /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
+     they are encountered at the top level, so we'll do that too. */
+@@ -4923,9 +4940,10 @@ for (;; ptr++)
+       (which is on the stack). We have to remember that there was XCLASS data,
+       however. */
+ 
++      if (class_uchardata > class_uchardata_base) xclass = TRUE;
++
+       if (lengthptr != NULL && class_uchardata > class_uchardata_base)
+         {
+-        xclass = TRUE;
+         *lengthptr += (int)(class_uchardata - class_uchardata_base);
+         class_uchardata = class_uchardata_base;
+         }
+@@ -5028,10 +5046,26 @@ for (;; ptr++)
+             ptr = tempptr + 1;
+             continue;
+ 
+-            /* For all other POSIX classes, no special action is taken in UCP
+-            mode. Fall through to the non_UCP case. */
++            /* For the other POSIX classes (ascii, xdigit) we are going to fall
++            through to the non-UCP case and build a bit map for characters with
++            code points less than 256. If we are in a negated POSIX class
++            within a non-negated overall class, characters with code points
++            greater than 255 must all match. In the special case where we have
++            not yet generated any xclass data, and this is the final item in
++            the overall class, we need do nothing: later on, the opcode
++            OP_NCLASS will be used to indicate that characters greater than 255
++            are acceptable. If we have already seen an xclass item or one may
++            follow (we have to assume that it might if this is not the end of
++            the class), explicitly match all wide codepoints. */
+ 
+             default:
++            if (!negate_class && local_negate &&
++                (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
++              {
++              *class_uchardata++ = XCL_RANGE;
++              class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
++              class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
++              }
+             break;
+             }
+           }
+@@ -5195,9 +5229,9 @@ for (;; ptr++)
+               cd, PRIV(vspace_list));
+             continue;
+ 
+-#ifdef SUPPORT_UCP
+             case ESC_p:
+             case ESC_P:
++#ifdef SUPPORT_UCP
+               {
+               BOOL negated;
+               unsigned int ptype = 0, pdata = 0;
+@@ -5211,6 +5245,9 @@ for (;; ptr++)
+               class_has_8bitchar--;                /* Undo! */
+               continue;
+               }
++#else
++            *errorcodeptr = ERR45;
++            goto FAILED;
+ #endif
+             /* Unrecognized escapes are faulted if PCRE is running in its
+             strict mode. By default, for compatibility with Perl, they are
+@@ -5367,16 +5404,20 @@ for (;; ptr++)
+       CLASS_SINGLE_CHARACTER:
+       if (class_one_char < 2) class_one_char++;
+ 
+-      /* If class_one_char is 1, we have the first single character in the
+-      class, and there have been no prior ranges, or XCLASS items generated by
+-      escapes. If this is the final character in the class, we can optimize by
+-      turning the item into a 1-character OP_CHAR[I] if it's positive, or
+-      OP_NOT[I] if it's negative. In the positive case, it can cause firstchar
+-      to be set. Otherwise, there can be no first char if this item is first,
+-      whatever repeat count may follow. In the case of reqchar, save the
+-      previous value for reinstating. */
++      /* If xclass_has_prop is false and class_one_char is 1, we have the first
++      single character in the class, and there have been no prior ranges, or
++      XCLASS items generated by escapes. If this is the final character in the
++      class, we can optimize by turning the item into a 1-character OP_CHAR[I]
++      if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
++      can cause firstchar to be set. Otherwise, there can be no first char if
++      this item is first, whatever repeat count may follow. In the case of
++      reqchar, save the previous value for reinstating. */
+ 
+-      if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
++      if (!inescq &&
++#ifdef SUPPORT_UCP
++          !xclass_has_prop &&
++#endif
++          class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
+         {
+         ptr++;
+         zeroreqchar = reqchar;
+@@ -5492,9 +5533,10 @@ for (;; ptr++)
+     actual compiled code. */
+ 
+ #ifdef SUPPORT_UTF
+-    if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0))
++    if (xclass && (xclass_has_prop || !should_flip_negation ||
++        (options & PCRE_UCP) != 0))
+ #elif !defined COMPILE_PCRE8
+-    if (xclass && !should_flip_negation)
++    if (xclass && (xclass_has_prop || !should_flip_negation))
+ #endif
+ #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+       {
+@@ -5930,7 +5972,7 @@ for (;; ptr++)
+       {
+       register int i;
+       int len = (int)(code - previous);
+-      size_t base_hwm_offset = save_hwm_offset;
++      size_t base_hwm_offset = item_hwm_offset;
+       pcre_uchar *bralink = NULL;
+       pcre_uchar *brazeroptr = NULL;
+ 
+@@ -5985,7 +6027,7 @@ for (;; ptr++)
+         if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
+           {
+           *code = OP_END;
+-          adjust_recurse(previous, 1, utf, cd, save_hwm_offset);
++          adjust_recurse(previous, 1, utf, cd, item_hwm_offset);
+           memmove(previous + 1, previous, IN_UCHARS(len));
+           code++;
+           if (repeat_max == 0)
+@@ -6009,7 +6051,7 @@ for (;; ptr++)
+           {
+           int offset;
+           *code = OP_END;
+-          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset);
++          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, item_hwm_offset);
+           memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
+           code += 2 + LINK_SIZE;
+           *previous++ = OP_BRAZERO + repeat_type;
+@@ -6254,6 +6296,12 @@ for (;; ptr++)
+             while (*scode == OP_ALT);
+             }
+ 
++          /* A conditional group with only one branch has an implicit empty
++          alternative branch. */
++
++          if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT)
++            *bracode = OP_SCOND;
++
+           /* Handle possessive quantifiers. */
+ 
+           if (possessive_quantifier)
+@@ -6267,11 +6315,11 @@ for (;; ptr++)
+               {
+               int nlen = (int)(code - bracode);
+               *code = OP_END;
+-              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
++              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
+               memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
+               code += 1 + LINK_SIZE;
+               nlen += 1 + LINK_SIZE;
+-              *bracode = OP_BRAPOS;
++              *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
+               *code++ = OP_KETRPOS;
+               PUTINC(code, 0, nlen);
+               PUT(bracode, 1, nlen);
+@@ -6401,7 +6449,7 @@ for (;; ptr++)
+         else
+           {
+           *code = OP_END;
+-          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
++          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
+           memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
+           code += 1 + LINK_SIZE;
+           len += 1 + LINK_SIZE;
+@@ -6450,7 +6498,7 @@ for (;; ptr++)
+ 
+         default:
+         *code = OP_END;
+-        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);
++        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
+         memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
+         code += 1 + LINK_SIZE;
+         len += 1 + LINK_SIZE;
+@@ -6586,9 +6634,17 @@ for (;; ptr++)
+               goto FAILED;
+               }
+             setverb = *code++ = verbs[i].op_arg;
+-            *code++ = arglen;
+-            memcpy(code, arg, IN_UCHARS(arglen));
+-            code += arglen;
++            if (lengthptr != NULL)    /* In pass 1 just add in the length */
++              {                       /* to avoid potential workspace */
++              *lengthptr += arglen;   /* overflow. */
++              *code++ = 0;
++              }
++            else
++              {
++              *code++ = arglen;
++              memcpy(code, arg, IN_UCHARS(arglen));
++              code += arglen;
++              }
+             *code++ = 0;
+             }
+ 
+@@ -6623,7 +6679,7 @@ for (;; ptr++)
+     newoptions = options;
+     skipbytes = 0;
+     bravalue = OP_CBRA;
+-    save_hwm_offset = cd->hwm - cd->start_workspace;
++    item_hwm_offset = cd->hwm - cd->start_workspace;
+     reset_bracount = FALSE;
+ 
+     /* Deal with the extended parentheses; all are introduced by '?', and the
+@@ -6641,6 +6697,7 @@ for (;; ptr++)
+         /* ------------------------------------------------------------ */
+         case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
+         reset_bracount = TRUE;
++        cd->dupgroups = TRUE;     /* Record (?| encountered */
+         /* Fall through */
+ 
+         /* ------------------------------------------------------------ */
+@@ -6741,6 +6798,12 @@ for (;; ptr++)
+           {
+           while (IS_DIGIT(*ptr))
+             {
++            if (recno > INT_MAX / 10 - 1)  /* Integer overflow */
++              {
++              while (IS_DIGIT(*ptr)) ptr++;
++              *errorcodeptr = ERR61;
++              goto FAILED;
++              }
+             recno = recno * 10 + (int)(*ptr - CHAR_0);
+             ptr++;
+             }
+@@ -6769,7 +6832,7 @@ for (;; ptr++)
+             ptr++;
+             }
+           namelen = (int)(ptr - name);
+-          if (lengthptr != NULL) *lengthptr += IMM2_SIZE;
++          if (lengthptr != NULL) skipbytes += IMM2_SIZE;
+           }
+ 
+         /* Check the terminator */
+@@ -6875,6 +6938,11 @@ for (;; ptr++)
+               *errorcodeptr = ERR15;
+               goto FAILED;
+               }
++            if (recno > INT_MAX / 10 - 1)   /* Integer overflow */
++              {
++              *errorcodeptr = ERR61;
++              goto FAILED;
++              }
+             recno = recno * 10 + name[i] - CHAR_0;
+             }
+           if (recno == 0) recno = RREF_ANY;
+@@ -7151,6 +7219,7 @@ for (;; ptr++)
+         if (lengthptr != NULL)
+           {
+           named_group *ng;
++          recno = 0;
+ 
+           if (namelen == 0)
+             {
+@@ -7168,20 +7237,6 @@ for (;; ptr++)
+             goto FAILED;
+             }
+ 
+-          /* The name table does not exist in the first pass; instead we must
+-          scan the list of names encountered so far in order to get the
+-          number. If the name is not found, set the value to 0 for a forward
+-          reference. */
+-
+-          ng = cd->named_groups;
+-          for (i = 0; i < cd->names_found; i++, ng++)
+-            {
+-            if (namelen == ng->length &&
+-                STRNCMP_UC_UC(name, ng->name, namelen) == 0)
+-              break;
+-            }
+-          recno = (i < cd->names_found)? ng->number : 0;
+-
+           /* Count named back references. */
+ 
+           if (!is_recurse) cd->namedrefcount++;
+@@ -7191,6 +7246,56 @@ for (;; ptr++)
+           16-bit data item. */
+ 
+           *lengthptr += IMM2_SIZE;
++
++          /* If this is a forward reference and we are within a (?|...) group,
++          the reference may end up as the number of a group which we are
++          currently inside, that is, it could be a recursive reference. In the
++          real compile this will be picked up and the reference wrapped with
++          OP_ONCE to make it atomic, so we must space in case this occurs. */
++
++          /* In fact, this can happen for a non-forward reference because
++          another group with the same number might be created later. This
++          issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
++          only mode, we finesse the bug by allowing more memory always. */
++
++          *lengthptr += 2 + 2*LINK_SIZE;
++
++          /* It is even worse than that. The current reference may be to an
++          existing named group with a different number (so apparently not
++          recursive) but which later on is also attached to a group with the
++          current number. This can only happen if $(| has been previous
++          encountered. In that case, we allow yet more memory, just in case.
++          (Again, this is fixed "properly" in PCRE2. */
++
++          if (cd->dupgroups) *lengthptr += 4 + 4*LINK_SIZE;
++
++          /* Otherwise, check for recursion here. The name table does not exist
++          in the first pass; instead we must scan the list of names encountered
++          so far in order to get the number. If the name is not found, leave
++          the value of recno as 0 for a forward reference. */
++
++          else
++            {
++            ng = cd->named_groups;
++            for (i = 0; i < cd->names_found; i++, ng++)
++              {
++              if (namelen == ng->length &&
++                  STRNCMP_UC_UC(name, ng->name, namelen) == 0)
++                {
++                open_capitem *oc;
++                recno = ng->number;
++                if (is_recurse) break;
++                for (oc = cd->open_caps; oc != NULL; oc = oc->next)
++                  {
++                  if (oc->number == recno)
++                    {
++                    oc->flag = TRUE;
++                    break;
++                    }
++                  }
++                }
++              }
++            }
+           }
+ 
+         /* In the real compile, search the name table. We check the name
+@@ -7237,8 +7342,6 @@ for (;; ptr++)
+           for (i++; i < cd->names_found; i++)
+             {
+             if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
+-
+-
+             count++;
+             cslot += cd->name_entry_size;
+             }
+@@ -7247,6 +7350,7 @@ for (;; ptr++)
+             {
+             if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
+             previous = code;
++            item_hwm_offset = cd->hwm - cd->start_workspace;
+             *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
+             PUT2INC(code, 0, index);
+             PUT2INC(code, 0, count);
+@@ -7284,9 +7388,14 @@ for (;; ptr++)
+ 
+ 
+         /* ------------------------------------------------------------ */
+-        case CHAR_R:              /* Recursion */
+-        ptr++;                    /* Same as (?0)      */
+-        /* Fall through */
++        case CHAR_R:              /* Recursion, same as (?0) */
++        recno = 0;
++        if (*(++ptr) != CHAR_RIGHT_PARENTHESIS)
++          {
++          *errorcodeptr = ERR29;
++          goto FAILED;
++          }
++        goto HANDLE_RECURSION;
+ 
+ 
+         /* ------------------------------------------------------------ */
+@@ -7323,7 +7432,15 @@ for (;; ptr++)
+ 
+           recno = 0;
+           while(IS_DIGIT(*ptr))
++            {
++            if (recno > INT_MAX / 10 - 1) /* Integer overflow */
++              {
++              while (IS_DIGIT(*ptr)) ptr++;
++              *errorcodeptr = ERR61;
++              goto FAILED;
++              }
+             recno = recno * 10 + *ptr++ - CHAR_0;
++            }
+ 
+           if (*ptr != (pcre_uchar)terminator)
+             {
+@@ -7360,6 +7477,7 @@ for (;; ptr++)
+           HANDLE_RECURSION:
+ 
+           previous = code;
++          item_hwm_offset = cd->hwm - cd->start_workspace;
+           called = cd->start_code;
+ 
+           /* When we are actually compiling, find the bracket that is being
+@@ -7561,7 +7679,11 @@ for (;; ptr++)
+       previous = NULL;
+       cd->iscondassert = FALSE;
+       }
+-    else previous = code;
++    else
++      {
++      previous = code;
++      item_hwm_offset = cd->hwm - cd->start_workspace;
++      }
+ 
+     *code = bravalue;
+     tempcode = code;
+@@ -7809,7 +7931,7 @@ for (;; ptr++)
+         const pcre_uchar *p;
+         pcre_uint32 cf;
+ 
+-        save_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
++        item_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
+         terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
+           CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
+ 
+@@ -7838,7 +7960,7 @@ for (;; ptr++)
+         if (*p != (pcre_uchar)terminator)
+           {
+           *errorcodeptr = ERR57;
+-          break;
++          goto FAILED;
+           }
+         ptr++;
+         goto HANDLE_NUMERICAL_RECURSION;
+@@ -7853,7 +7975,7 @@ for (;; ptr++)
+           ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
+           {
+           *errorcodeptr = ERR69;
+-          break;
++          goto FAILED;
+           }
+         is_recurse = FALSE;
+         terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
+@@ -7877,6 +7999,7 @@ for (;; ptr++)
+         HANDLE_REFERENCE:
+         if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
+         previous = code;
++        item_hwm_offset = cd->hwm - cd->start_workspace;
+         *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
+         PUT2INC(code, 0, recno);
+         cd->backref_map |= (recno < 32)? (1 << recno) : 1;
+@@ -7906,6 +8029,7 @@ for (;; ptr++)
+         if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
+           goto FAILED;
+         previous = code;
++        item_hwm_offset = cd->hwm - cd->start_workspace;
+         *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
+         *code++ = ptype;
+         *code++ = pdata;
+@@ -7946,6 +8070,7 @@ for (;; ptr++)
+ 
+           {
+           previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
++          item_hwm_offset = cd->hwm - cd->start_workspace;
+           *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
+           }
+         }
+@@ -7989,6 +8114,7 @@ for (;; ptr++)
+ 
+     ONE_CHAR:
+     previous = code;
++    item_hwm_offset = cd->hwm - cd->start_workspace;
+ 
+     /* For caseless UTF-8 mode when UCP support is available, check whether
+     this character has more than one other case. If so, generate a special
+@@ -9164,6 +9290,7 @@ cd->names_found = 0;
+ cd->name_entry_size = 0;
+ cd->name_table = NULL;
+ cd->dupnames = FALSE;
++cd->dupgroups = FALSE;
+ cd->namedrefcount = 0;
+ cd->start_code = cworkspace;
+ cd->hwm = cworkspace;
+@@ -9336,6 +9463,16 @@ if (cd->hwm > cd->start_workspace)
+     int offset, recno;
+     cd->hwm -= LINK_SIZE;
+     offset = GET(cd->hwm, 0);
++
++    /* Check that the hwm handling hasn't gone wrong. This whole area is
++    rewritten in PCRE2 because there are some obscure cases. */
++
++    if (offset == 0 || codestart[offset-1] != OP_RECURSE)
++      {
++      errorcode = ERR10;
++      break;
++      }
++
+     recno = GET(codestart, offset);
+     if (recno != prev_recno)
+       {
+@@ -9366,7 +9503,7 @@ used in this code because at least one compiler gives a warning about loss of
+ "const" attribute if the cast (pcre_uchar *)codestart is used directly in the
+ function call. */
+ 
+-if ((options & PCRE_NO_AUTO_POSSESS) == 0)
++if (errorcode == 0 && (options & PCRE_NO_AUTO_POSSESS) == 0)
+   {
+   pcre_uchar *temp = (pcre_uchar *)codestart;
+   auto_possessify(temp, utf, cd);
+@@ -9380,7 +9517,7 @@ OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
+ exceptional ones forgo this. We scan the pattern to check that they are fixed
+ length, and set their lengths. */
+ 
+-if (cd->check_lookbehind)
++if (errorcode == 0 && cd->check_lookbehind)
+   {
+   pcre_uchar *cc = (pcre_uchar *)codestart;
+ 
+@@ -9593,4 +9730,3 @@ return (pcre32 *)re;
+ }
+ 
+ /* End of pcre_compile.c */
+-
+diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
+index 3942076..24b23ca 100644
+--- a/ext/pcre/pcrelib/pcre_exec.c
++++ b/ext/pcre/pcrelib/pcre_exec.c
+@@ -688,7 +688,7 @@ the alternative names that are used. */
+ #define foc           number
+ #define save_mark     data
+ 
+-/* These statements are here to stop the compiler complaining about uninitialized
++/* These statements are here to stop the compiler complaining about unitialized
+ variables. */
+ 
+ #ifdef SUPPORT_UCP
+@@ -6685,7 +6685,8 @@ if (md->offset_vector != NULL)
+   register int *iend = iptr - re->top_bracket;
+   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
+   while (--iptr >= iend) *iptr = -1;
+-  md->offset_vector[0] = md->offset_vector[1] = -1;
++  if (offsetcount > 0) md->offset_vector[0] = -1;
++  if (offsetcount > 1) md->offset_vector[1] = -1;
+   }
+ 
+ /* Set up the first character to match, if available. The first_char value is
+diff --git a/ext/pcre/pcrelib/pcre_internal.h b/ext/pcre/pcrelib/pcre_internal.h
+index 4c4817d..aec1879 100644
+--- a/ext/pcre/pcrelib/pcre_internal.h
++++ b/ext/pcre/pcrelib/pcre_internal.h
+@@ -988,7 +988,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
+ #ifndef EBCDIC
+ 
+ #define HSPACE_LIST \
+-  CHAR_HT, CHAR_SPACE, 0xa0, \
++  CHAR_HT, CHAR_SPACE, CHAR_NBSP, \
+   0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
+   0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
+   NOTACHAR
+@@ -1014,7 +1014,7 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
+ #define HSPACE_BYTE_CASES \
+   case CHAR_HT: \
+   case CHAR_SPACE: \
+-  case 0xa0     /* NBSP */
++  case CHAR_NBSP
+ 
+ #define HSPACE_CASES \
+   HSPACE_BYTE_CASES: \
+@@ -1041,11 +1041,12 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
+ /* ------ EBCDIC environments ------ */
+ 
+ #else
+-#define HSPACE_LIST CHAR_HT, CHAR_SPACE
++#define HSPACE_LIST CHAR_HT, CHAR_SPACE, CHAR_NBSP, NOTACHAR
+ 
+ #define HSPACE_BYTE_CASES \
+   case CHAR_HT: \
+-  case CHAR_SPACE
++  case CHAR_SPACE: \
++  case CHAR_NBSP
+ 
+ #define HSPACE_CASES HSPACE_BYTE_CASES
+ 
+@@ -1219,6 +1220,7 @@ same code point. */
+ 
+ #define CHAR_ESC                    '\047'
+ #define CHAR_DEL                    '\007'
++#define CHAR_NBSP                   '\x41'
+ #define STR_ESC                     "\047"
+ #define STR_DEL                     "\007"
+ 
+@@ -1233,6 +1235,7 @@ a positive value. */
+ #define CHAR_NEL                    ((unsigned char)'\x85')
+ #define CHAR_ESC                    '\033'
+ #define CHAR_DEL                    '\177'
++#define CHAR_NBSP                   ((unsigned char)'\xa0')
+ 
+ #define STR_LF                      "\n"
+ #define STR_NL                      STR_LF
+@@ -1610,6 +1613,7 @@ only. */
+ #define CHAR_VERTICAL_LINE          '\174'
+ #define CHAR_RIGHT_CURLY_BRACKET    '\175'
+ #define CHAR_TILDE                  '\176'
++#define CHAR_NBSP                   ((unsigned char)'\xa0')
+ 
+ #define STR_HT                      "\011"
+ #define STR_VT                      "\013"
+@@ -1766,6 +1770,10 @@ only. */
+ 
+ /* Escape items that are just an encoding of a particular data value. */
+ 
++#ifndef ESC_a
++#define ESC_a CHAR_BEL
++#endif
++
+ #ifndef ESC_e
+ #define ESC_e CHAR_ESC
+ #endif
+@@ -2450,6 +2458,7 @@ typedef struct compile_data {
+   BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
+   BOOL check_lookbehind;            /* Lookbehinds need later checking */
+   BOOL dupnames;                    /* Duplicate names exist */
++  BOOL dupgroups;                   /* Duplicate groups exist: (?| found */
+   BOOL iscondassert;                /* Next assert is a condition */
+   int  nltype;                      /* Newline type */
+   int  nllen;                       /* Newline string length */
+diff --git a/ext/pcre/pcrelib/pcre_jit_compile.c b/ext/pcre/pcrelib/pcre_jit_compile.c
+index debdf6e..445de0c 100644
+--- a/ext/pcre/pcrelib/pcre_jit_compile.c
++++ b/ext/pcre/pcrelib/pcre_jit_compile.c
+@@ -1064,6 +1064,7 @@ pcre_uchar *alternative;
+ pcre_uchar *end = NULL;
+ int private_data_ptr = *private_data_start;
+ int space, size, bracketlen;
++BOOL repeat_check = TRUE;
+ 
+ while (cc < ccend)
+   {
+@@ -1071,9 +1072,10 @@ while (cc < ccend)
+   size = 0;
+   bracketlen = 0;
+   if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
+-    return;
++    break;
+ 
+-  if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
++  if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
++    {
+     if (detect_repeat(common, cc))
+       {
+       /* These brackets are converted to repeats, so no global
+@@ -1081,6 +1083,8 @@ while (cc < ccend)
+       if (cc >= end)
+         end = bracketend(cc);
+       }
++    }
++  repeat_check = TRUE;
+ 
+   switch(*cc)
+     {
+@@ -1136,6 +1140,13 @@ while (cc < ccend)
+     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
+     break;
+ 
++    case OP_BRAZERO:
++    case OP_BRAMINZERO:
++    case OP_BRAPOSZERO:
++    repeat_check = FALSE;
++    size = 1;
++    break;
++
+     CASE_ITERATOR_PRIVATE_DATA_1
+     space = 1;
+     size = -2;
+@@ -1162,12 +1173,17 @@ while (cc < ccend)
+     size = 1;
+     break;
+ 
+-    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
++    case OP_TYPEUPTO:
+     if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
+       space = 2;
+     size = 1 + IMM2_SIZE;
+     break;
+ 
++    case OP_TYPEMINUPTO:
++    space = 2;
++    size = 1 + IMM2_SIZE;
++    break;
++
+     case OP_CLASS:
+     case OP_NCLASS:
+     size += 1 + 32 / sizeof(pcre_uchar);
+@@ -1316,6 +1332,13 @@ while (cc < ccend)
+     cc += 1 + LINK_SIZE + IMM2_SIZE;
+     break;
+ 
++    case OP_THEN:
++    stack_restore = TRUE;
++    if (common->control_head_ptr != 0)
++      *needs_control_head = TRUE;
++    cc ++;
++    break;
++
+     default:
+     stack_restore = TRUE;
+     /* Fall through. */
+@@ -2220,6 +2243,7 @@ while (current != NULL)
+     SLJIT_ASSERT_STOP();
+     break;
+     }
++  SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
+   current = (sljit_sw*)current[-1];
+   }
+ return -1;
+@@ -3209,7 +3233,7 @@ bytes[len] = byte;
+ bytes[0] = len;
+ }
+ 
+-static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars)
++static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
+ {
+ /* Recursive function, which scans prefix literals. */
+ BOOL last, any, caseless;
+@@ -3227,9 +3251,14 @@ pcre_uchar othercase[1];
+ repeat = 1;
+ while (TRUE)
+   {
++  if (*rec_count == 0)
++    return 0;
++  (*rec_count)--;
++
+   last = TRUE;
+   any = FALSE;
+   caseless = FALSE;
++
+   switch (*cc)
+     {
+     case OP_CHARI:
+@@ -3291,7 +3320,7 @@ while (TRUE)
+ #ifdef SUPPORT_UTF
+     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
+ #endif
+-    max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars);
++    max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
+     if (max_chars == 0)
+       return consumed;
+     last = FALSE;
+@@ -3314,7 +3343,7 @@ while (TRUE)
+     alternative = cc + GET(cc, 1);
+     while (*alternative == OP_ALT)
+       {
+-      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars);
++      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
+       if (max_chars == 0)
+         return consumed;
+       alternative += GET(alternative, 1);
+@@ -3556,6 +3585,7 @@ int i, max, from;
+ int range_right = -1, range_len = 3 - 1;
+ sljit_ub *update_table = NULL;
+ BOOL in_range;
++pcre_uint32 rec_count;
+ 
+ for (i = 0; i < MAX_N_CHARS; i++)
+   {
+@@ -3564,7 +3594,8 @@ for (i = 0; i < MAX_N_CHARS; i++)
+   bytes[i * MAX_N_BYTES] = 0;
+   }
+ 
+-max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS);
++rec_count = 10000;
++max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
+ 
+ if (max <= 1)
+   return FALSE;
+@@ -4311,8 +4342,10 @@ switch(length)
+   case 4:
+   if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
+       && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
++      && (ranges[1] & (ranges[2] - ranges[0])) == 0
+       && is_powerof2(ranges[2] - ranges[0]))
+     {
++    SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
+     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
+     if (ranges[2] + 1 != ranges[3])
+       {
+@@ -4900,9 +4933,10 @@ else if ((cc[-1] & XCL_MAP) != 0)
+   if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
+     {
+ #ifdef COMPILE_PCRE8
+-    SLJIT_ASSERT(common->utf);
++    jump = NULL;
++    if (common->utf)
+ #endif
+-    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
++      jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+ 
+     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+@@ -4911,7 +4945,10 @@ else if ((cc[-1] & XCL_MAP) != 0)
+     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+     add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
+ 
+-    JUMPHERE(jump);
++#ifdef COMPILE_PCRE8
++    if (common->utf)
++#endif
++      JUMPHERE(jump);
+     }
+ 
+   OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
+@@ -5219,7 +5256,7 @@ while (*cc != XCL_END)
+       OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ 
+       SET_CHAR_OFFSET(0);
+-      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
++      OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
+       OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ 
+       SET_TYPE_OFFSET(ucp_Pc);
+@@ -7665,6 +7702,10 @@ while (*cc != OP_KETRPOS)
+       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
+       }
+ 
++    /* Even if the match is empty, we need to reset the control head. */
++    if (needs_control_head)
++      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
++
+     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
+       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
+ 
+@@ -7692,6 +7733,10 @@ while (*cc != OP_KETRPOS)
+       OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
+       }
+ 
++    /* Even if the match is empty, we need to reset the control head. */
++    if (needs_control_head)
++      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
++
+     if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
+       add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
+ 
+@@ -7704,9 +7749,6 @@ while (*cc != OP_KETRPOS)
+       }
+     }
+ 
+-  if (needs_control_head)
+-    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
+-
+   JUMPTO(SLJIT_JUMP, loop);
+   flush_stubs(common);
+ 
+@@ -8441,8 +8483,7 @@ while (cc < ccend)
+       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
+       }
+     BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
+-    if (cc[1] > OP_ASSERTBACK_NOT)
+-      count_match(common);
++    count_match(common);
+     break;
+ 
+     case OP_ONCE:
+@@ -9624,7 +9665,7 @@ static SLJIT_INLINE void compile_recurse(compiler_common *common)
+ DEFINE_COMPILER;
+ pcre_uchar *cc = common->start + common->currententry->start;
+ pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
+-pcre_uchar *ccend = bracketend(cc);
++pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
+ BOOL needs_control_head;
+ int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head);
+ int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head);
+@@ -9648,6 +9689,7 @@ set_jumps(common->currententry->calls, common->currententry->entry);
+ 
+ sljit_emit_fast_enter(compiler, TMP2, 0);
+ allocate_stack(common, private_data_size + framesize + alternativesize);
++count_match(common);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0);
+ copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head);
+ if (needs_control_head)
+@@ -9992,6 +10034,7 @@ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
+ OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
+ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
+ OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
++OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
+ 
+ if (mode == JIT_PARTIAL_SOFT_COMPILE)
+diff --git a/ext/pcre/pcrelib/pcre_study.c b/ext/pcre/pcrelib/pcre_study.c
+index 998fe23..7fd0ba0 100644
+--- a/ext/pcre/pcrelib/pcre_study.c
++++ b/ext/pcre/pcrelib/pcre_study.c
+@@ -71,6 +71,7 @@ rather than bytes.
+   startcode       pointer to start of the whole pattern's code
+   options         the compiling options
+   recurses        chain of recurse_check to catch mutual recursion
++  countptr        pointer to call count (to catch over complexity)
+ 
+ Returns:   the minimum length
+            -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
+@@ -80,7 +81,8 @@ Returns:   the minimum length
+ 
+ static int
+ find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
+-  const pcre_uchar *startcode, int options, recurse_check *recurses)
++  const pcre_uchar *startcode, int options, recurse_check *recurses,
++  int *countptr)
+ {
+ int length = -1;
+ /* PCRE_UTF16 has the same value as PCRE_UTF8. */
+@@ -90,6 +92,8 @@ recurse_check this_recurse;
+ register int branchlength = 0;
+ register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
+ 
++if ((*countptr)++ > 1000) return -1;   /* too complex */
++
+ if (*code == OP_CBRA || *code == OP_SCBRA ||
+     *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
+ 
+@@ -131,7 +135,7 @@ for (;;)
+     case OP_SBRAPOS:
+     case OP_ONCE:
+     case OP_ONCE_NC:
+-    d = find_minlength(re, cc, startcode, options, recurses);
++    d = find_minlength(re, cc, startcode, options, recurses, countptr);
+     if (d < 0) return d;
+     branchlength += d;
+     do cc += GET(cc, 1); while (*cc == OP_ALT);
+@@ -415,7 +419,8 @@ for (;;)
+             int dd;
+             this_recurse.prev = recurses;
+             this_recurse.group = cs;
+-            dd = find_minlength(re, cs, startcode, options, &this_recurse);
++            dd = find_minlength(re, cs, startcode, options, &this_recurse,
++              countptr);
+             if (dd < d) d = dd;
+             }
+           }
+@@ -451,7 +456,8 @@ for (;;)
+           {
+           this_recurse.prev = recurses;
+           this_recurse.group = cs;
+-          d = find_minlength(re, cs, startcode, options, &this_recurse);
++          d = find_minlength(re, cs, startcode, options, &this_recurse,
++            countptr);
+           }
+         }
+       }
+@@ -514,7 +520,7 @@ for (;;)
+         this_recurse.prev = recurses;
+         this_recurse.group = cs;
+         branchlength += find_minlength(re, cs, startcode, options,
+-          &this_recurse);
++          &this_recurse, countptr);
+         }
+       }
+     cc += 1 + LINK_SIZE;
+@@ -1453,6 +1459,7 @@ pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
+ #endif
+ {
+ int min;
++int count = 0;
+ BOOL bits_set = FALSE;
+ pcre_uint8 start_bits[32];
+ PUBL(extra) *extra = NULL;
+@@ -1539,7 +1546,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
+ 
+ /* Find the minimum length of subject string. */
+ 
+-switch(min = find_minlength(re, code, code, re->options, NULL))
++switch(min = find_minlength(re, code, code, re->options, NULL, &count))
+   {
+   case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
+   case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
+diff --git a/ext/pcre/pcrelib/pcre_xclass.c b/ext/pcre/pcrelib/pcre_xclass.c
+index c2b61f0..ef759a5 100644
+--- a/ext/pcre/pcrelib/pcre_xclass.c
++++ b/ext/pcre/pcrelib/pcre_xclass.c
+@@ -246,7 +246,7 @@ while ((t = *data++) != XCL_END)
+ 
+       case PT_PXPUNCT:
+       if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
+-            (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
++            (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
+         return !negated;
+       break;
+ 
+diff --git a/ext/pcre/pcrelib/sljit/sljitConfig.h b/ext/pcre/pcrelib/sljit/sljitConfig.h
+index 10364c3..1c8a521 100644
+--- a/ext/pcre/pcrelib/sljit/sljitConfig.h
++++ b/ext/pcre/pcrelib/sljit/sljitConfig.h
+@@ -96,6 +96,15 @@
+ #define SLJIT_EXECUTABLE_ALLOCATOR 1
+ #endif
+ 
++/* Force cdecl calling convention even if a better calling
++   convention (e.g. fastcall) is supported by the C compiler.
++   If this option is enabled, C functions without
++   SLJIT_CALL can also be called from JIT code. */
++#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION
++/* Disabled by default */
++#define SLJIT_USE_CDECL_CALLING_CONVENTION 0
++#endif
++
+ /* Return with error when an invalid argument is passed. */
+ #ifndef SLJIT_ARGUMENT_CHECKS
+ /* Disabled by default */
+diff --git a/ext/pcre/pcrelib/sljit/sljitConfigInternal.h b/ext/pcre/pcrelib/sljit/sljitConfigInternal.h
+index 3284012..16e3547 100644
+--- a/ext/pcre/pcrelib/sljit/sljitConfigInternal.h
++++ b/ext/pcre/pcrelib/sljit/sljitConfigInternal.h
+@@ -468,7 +468,12 @@ typedef double sljit_d;
+ 
+ #ifndef SLJIT_CALL
+ 
+-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
++#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION)
++
++/* Force cdecl. */
++#define SLJIT_CALL
++
++#elif (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ 
+ #if defined(__GNUC__) && !defined(__APPLE__)
+ 
+@@ -608,6 +613,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
+ #define SLJIT_LOCALS_OFFSET_BASE ((23 + 1) * sizeof(sljit_sw))
+ #endif
+ 
++#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
++
++#define SLJIT_NUMBER_OF_REGISTERS 10
++#define SLJIT_NUMBER_OF_SAVED_REGISTERS 5
++#define SLJIT_LOCALS_OFFSET_BASE 0
++
+ #elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
+ 
+ #define SLJIT_NUMBER_OF_REGISTERS 0
+diff --git a/ext/pcre/pcrelib/sljit/sljitLir.c b/ext/pcre/pcrelib/sljit/sljitLir.c
+index 5039a7e..0f1b1c9 100644
+--- a/ext/pcre/pcrelib/sljit/sljitLir.c
++++ b/ext/pcre/pcrelib/sljit/sljitLir.c
+@@ -845,8 +845,8 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp
+ 	}
+ 
+ static SLJIT_CONST char* op0_names[] = {
+-	(char*)"breakpoint", (char*)"nop",
+-	(char*)"lumul", (char*)"lsmul", (char*)"ludiv", (char*)"lsdiv",
++	(char*)"breakpoint", (char*)"nop", (char*)"lumul", (char*)"lsmul",
++	(char*)"udivmod", (char*)"sdivmod", (char*)"udivi", (char*)"sdivi"
+ };
+ 
+ static SLJIT_CONST char* op1_names[] = {
+@@ -1036,7 +1036,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler
+ {
+ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ 	CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LSMUL)
+-		|| ((op & ~SLJIT_INT_OP) >= SLJIT_LUDIV && (op & ~SLJIT_INT_OP) <= SLJIT_LSDIV));
++		|| ((op & ~SLJIT_INT_OP) >= SLJIT_UDIVMOD && (op & ~SLJIT_INT_OP) <= SLJIT_SDIVI));
+ 	CHECK_ARGUMENT(op < SLJIT_LUMUL || compiler->scratches >= 2);
+ #endif
+ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+@@ -1447,6 +1447,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com
+ 
+ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
+ {
++	SLJIT_UNUSED_ARG(offset);
++
+ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ 	FUNCTION_CHECK_DST(dst, dstw);
+ #endif
+@@ -1462,6 +1464,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_co
+ 
+ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
+ {
++	SLJIT_UNUSED_ARG(init_value);
++
+ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+ 	FUNCTION_CHECK_DST(dst, dstw);
+ #endif
+diff --git a/ext/pcre/pcrelib/sljit/sljitLir.h b/ext/pcre/pcrelib/sljit/sljitLir.h
+index 24c0f60..2e2e9ac09 100644
+--- a/ext/pcre/pcrelib/sljit/sljitLir.h
++++ b/ext/pcre/pcrelib/sljit/sljitLir.h
+@@ -687,7 +687,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
+ #define SLJIT_OP0_BASE			0
+ 
+ /* Flags: - (never set any flags)
+-   Note: breakpoint instruction is not supported by all architectures (namely ppc)
++   Note: breakpoint instruction is not supported by all architectures (e.g. ppc)
+          It falls back to SLJIT_NOP in those cases. */
+ #define SLJIT_BREAKPOINT		(SLJIT_OP0_BASE + 0)
+ /* Flags: - (never set any flags)
+@@ -696,24 +696,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
+ #define SLJIT_NOP			(SLJIT_OP0_BASE + 1)
+ /* Flags: - (may destroy flags)
+    Unsigned multiplication of SLJIT_R0 and SLJIT_R1.
+-   Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */
++   Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */
+ #define SLJIT_LUMUL			(SLJIT_OP0_BASE + 2)
+ /* Flags: - (may destroy flags)
+    Signed multiplication of SLJIT_R0 and SLJIT_R1.
+-   Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */
++   Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */
+ #define SLJIT_LSMUL			(SLJIT_OP0_BASE + 3)
+ /* Flags: I - (may destroy flags)
+    Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
+-   The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1.
+-   Note: if SLJIT_R1 contains 0, the behaviour is undefined. */
+-#define SLJIT_LUDIV			(SLJIT_OP0_BASE + 4)
+-#define SLJIT_ILUDIV			(SLJIT_LUDIV | SLJIT_INT_OP)
++   The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
++   Note: if SLJIT_R1 is 0, the behaviour is undefined. */
++#define SLJIT_UDIVMOD			(SLJIT_OP0_BASE + 4)
++#define SLJIT_IUDIVMOD			(SLJIT_UDIVMOD | SLJIT_INT_OP)
+ /* Flags: I - (may destroy flags)
+    Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
+-   The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1.
+-   Note: if SLJIT_R1 contains 0, the behaviour is undefined. */
+-#define SLJIT_LSDIV			(SLJIT_OP0_BASE + 5)
+-#define SLJIT_ILSDIV			(SLJIT_LSDIV | SLJIT_INT_OP)
++   The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
++   Note: if SLJIT_R1 is 0, the behaviour is undefined.
++   Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00),
++         the behaviour is undefined. */
++#define SLJIT_SDIVMOD			(SLJIT_OP0_BASE + 5)
++#define SLJIT_ISDIVMOD			(SLJIT_SDIVMOD | SLJIT_INT_OP)
++/* Flags: I - (may destroy flags)
++   Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
++   The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
++   Note: if SLJIT_R1 is 0, the behaviour is undefined.
++   Note: SLJIT_SDIV is single precision divide. */
++#define SLJIT_UDIVI			(SLJIT_OP0_BASE + 6)
++#define SLJIT_IUDIVI			(SLJIT_UDIVI | SLJIT_INT_OP)
++/* Flags: I - (may destroy flags)
++   Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
++   The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
++   Note: if SLJIT_R1 is 0, the behaviour is undefined.
++   Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00),
++         the behaviour is undefined.
++   Note: SLJIT_SDIV is single precision divide. */
++#define SLJIT_SDIVI			(SLJIT_OP0_BASE + 7)
++#define SLJIT_ISDIVI			(SLJIT_SDIVI | SLJIT_INT_OP)
+ 
+ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op);
+ 
+@@ -851,34 +869,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
+ 	sljit_si src1, sljit_sw src1w,
+ 	sljit_si src2, sljit_sw src2w);
+ 
+-/* The following function is a helper function for sljit_emit_op_custom.
+-   It returns with the real machine register index ( >=0 ) of any SLJIT_R,
+-   SLJIT_S and SLJIT_SP registers.
+-
+-   Note: it returns with -1 for virtual registers (only on x86-32). */
+-
+-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
+-
+-/* The following function is a helper function for sljit_emit_op_custom.
+-   It returns with the real machine register index of any SLJIT_FLOAT register.
+-
+-   Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
+-
+-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
+-
+-/* Any instruction can be inserted into the instruction stream by
+-   sljit_emit_op_custom. It has a similar purpose as inline assembly.
+-   The size parameter must match to the instruction size of the target
+-   architecture:
+-
+-         x86: 0 < size <= 15. The instruction argument can be byte aligned.
+-      Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
+-              if size == 4, the instruction argument must be 4 byte aligned.
+-   Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
+-
+-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
+-	void *instruction, sljit_si size);
+-
+ /* Returns with non-zero if fpu is available. */
+ 
+ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
+@@ -1196,4 +1186,64 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct
+ 
+ #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
+ 
++/* --------------------------------------------------------------------- */
++/*  CPU specific functions                                               */
++/* --------------------------------------------------------------------- */
++
++/* The following function is a helper function for sljit_emit_op_custom.
++   It returns with the real machine register index ( >=0 ) of any SLJIT_R,
++   SLJIT_S and SLJIT_SP registers.
++
++   Note: it returns with -1 for virtual registers (only on x86-32). */
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
++
++/* The following function is a helper function for sljit_emit_op_custom.
++   It returns with the real machine register index of any SLJIT_FLOAT register.
++
++   Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
++
++/* Any instruction can be inserted into the instruction stream by
++   sljit_emit_op_custom. It has a similar purpose as inline assembly.
++   The size parameter must match to the instruction size of the target
++   architecture:
++
++         x86: 0 < size <= 15. The instruction argument can be byte aligned.
++      Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
++              if size == 4, the instruction argument must be 4 byte aligned.
++   Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
++	void *instruction, sljit_si size);
++
++#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
++
++/* Returns with non-zero if sse2 is available. */
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void);
++
++/* Returns with non-zero if cmov instruction is available. */
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void);
++
++/* Emit a conditional mov instruction on x86 CPUs. This instruction
++   moves src to destination, if the condition is satisfied. Unlike
++   other arithmetic instructions, destination must be a register.
++   Before such instructions are emitted, cmov support should be
++   checked by sljit_x86_is_cmov_available function.
++    type must be between SLJIT_EQUAL and SLJIT_S_ORDERED
++    dst_reg must be a valid register and it can be combined
++      with SLJIT_INT_OP to perform 32 bit arithmetic
++   Flags: I - (never set any flags)
++ */
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
++	sljit_si type,
++	sljit_si dst_reg,
++	sljit_si src, sljit_sw srcw);
++
++#endif
++
+ #endif /* _SLJIT_LIR_H_ */
+diff --git a/ext/pcre/pcrelib/sljit/sljitNativeARM_32.c b/ext/pcre/pcrelib/sljit/sljitNativeARM_32.c
+index aca1d31..5cd4c71 100644
+--- a/ext/pcre/pcrelib/sljit/sljitNativeARM_32.c
++++ b/ext/pcre/pcrelib/sljit/sljitNativeARM_32.c
+@@ -1833,18 +1833,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 			| (reg_map[SLJIT_R0] << 8)
+ 			| reg_map[TMP_REG1]);
+ #endif
+-	case SLJIT_LUDIV:
+-	case SLJIT_LSDIV:
+-		if (compiler->scratches >= 3)
++	case SLJIT_UDIVMOD:
++	case SLJIT_SDIVMOD:
++	case SLJIT_UDIVI:
++	case SLJIT_SDIVI:
++		SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
++		SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping);
++
++		if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) {
+ 			FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */));
++			FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */));
++		}
++		else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3))
++			FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */));
++
+ #if defined(__GNUC__)
+ 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+-			(op == SLJIT_LUDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
++			((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+ #else
+ #error "Software divmod functions are needed"
+ #endif
+-		if (compiler->scratches >= 3)
+-			return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */);
++
++		if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) {
++			FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */));
++			FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */));
++		}
++		else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3))
++			return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */);
+ 		return SLJIT_SUCCESS;
+ 	}
+ 
+diff --git a/ext/pcre/pcrelib/sljit/sljitNativeARM_64.c b/ext/pcre/pcrelib/sljit/sljitNativeARM_64.c
+index b66455f..044a675 100644
+--- a/ext/pcre/pcrelib/sljit/sljitNativeARM_64.c
++++ b/ext/pcre/pcrelib/sljit/sljitNativeARM_64.c
+@@ -1087,14 +1087,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
+ 			saved_regs_size += sizeof(sljit_sw);
+ 		}
+ 		local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
+-		FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
++		if (saved_regs_size > 0)
++			FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
+ 	}
+ 
+ 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
+ 	prev = -1;
+ 	for (i = SLJIT_S0; i >= tmp; i--) {
+ 		if (prev == -1) {
+-			prev = i;
++			if (!(offs & (1 << 15))) {
++				prev = i;
++				continue;
++			}
++			FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
++			offs += 1 << 15;
+ 			continue;
+ 		}
+ 		FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+@@ -1104,7 +1110,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
+ 
+ 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+ 		if (prev == -1) {
+-			prev = i;
++			if (!(offs & (1 << 15))) {
++				prev = i;
++				continue;
++			}
++			FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
++			offs += 1 << 15;
+ 			continue;
+ 		}
+ 		FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+@@ -1112,8 +1123,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
+ 		prev = -1;
+ 	}
+ 
+-	if (prev != -1)
+-		FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(TMP_SP) | (offs >> 5)));
++	SLJIT_ASSERT(prev == -1);
+ 
+ 	if (compiler->local_size > (63 * sizeof(sljit_sw))) {
+ 		/* The local_size is already adjusted by the saved registers. */
+@@ -1188,7 +1198,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi
+ 	prev = -1;
+ 	for (i = SLJIT_S0; i >= tmp; i--) {
+ 		if (prev == -1) {
+-			prev = i;
++			if (!(offs & (1 << 15))) {
++				prev = i;
++				continue;
++			}
++			FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
++			offs += 1 << 15;
+ 			continue;
+ 		}
+ 		FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+@@ -1198,7 +1213,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi
+ 
+ 	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
+ 		if (prev == -1) {
+-			prev = i;
++			if (!(offs & (1 << 15))) {
++				prev = i;
++				continue;
++			}
++			FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
++			offs += 1 << 15;
+ 			continue;
+ 		}
+ 		FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+@@ -1206,13 +1226,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi
+ 		prev = -1;
+ 	}
+ 
+-	if (prev != -1)
+-		FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5)));
++	SLJIT_ASSERT(prev == -1);
+ 
+ 	if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
+ 		FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
+ 			| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
+-	} else {
++	} else if (saved_regs_size > 0) {
+ 		FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
+ 	}
+ 
+@@ -1242,12 +1261,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 		FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+ 		FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
+ 		return push_inst(compiler, (op == SLJIT_LUMUL ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
+-	case SLJIT_LUDIV:
+-	case SLJIT_LSDIV:
++	case SLJIT_UDIVMOD:
++	case SLJIT_SDIVMOD:
+ 		FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+-		FAIL_IF(push_inst(compiler, ((op == SLJIT_LUDIV ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
++		FAIL_IF(push_inst(compiler, ((op == SLJIT_UDIVMOD ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
+ 		FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
+ 		return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
++	case SLJIT_UDIVI:
++	case SLJIT_SDIVI:
++		return push_inst(compiler, ((op == SLJIT_UDIVI ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1));
+ 	}
+ 
+ 	return SLJIT_SUCCESS;
+diff --git a/ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c b/ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c
+index 6e38cec..f9803f5 100644
+--- a/ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c
++++ b/ext/pcre/pcrelib/sljit/sljitNativeARM_T2_32.c
+@@ -1239,6 +1239,9 @@ extern int __aeabi_idivmod(int numerator, int denominator);
+ 
+ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
+ {
++	sljit_sw saved_reg_list[3];
++	sljit_sw saved_reg_count;
++
+ 	CHECK_ERROR();
+ 	CHECK(check_sljit_emit_op0(compiler, op));
+ 
+@@ -1255,24 +1258,53 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 			| (reg_map[SLJIT_R0] << 12)
+ 			| (reg_map[SLJIT_R0] << 16)
+ 			| reg_map[SLJIT_R1]);
+-	case SLJIT_LUDIV:
+-	case SLJIT_LSDIV:
+-		if (compiler->scratches >= 4) {
+-			FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */));
+-			FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */));
+-		} else if (compiler->scratches >= 3)
+-			FAIL_IF(push_inst32(compiler, 0xf84d2d08 /* str r2, [sp, #-8]! */));
++	case SLJIT_UDIVMOD:
++	case SLJIT_SDIVMOD:
++	case SLJIT_UDIVI:
++	case SLJIT_SDIVI:
++		SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
++		SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12, bad_register_mapping);
++
++		saved_reg_count = 0;
++		if (compiler->scratches >= 4)
++			saved_reg_list[saved_reg_count++] = 12;
++		if (compiler->scratches >= 3)
++			saved_reg_list[saved_reg_count++] = 2;
++		if (op >= SLJIT_UDIVI)
++			saved_reg_list[saved_reg_count++] = 1;
++
++		if (saved_reg_count > 0) {
++			FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8)
++						| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
++			if (saved_reg_count >= 2) {
++				SLJIT_ASSERT(saved_reg_list[1] < 8);
++				FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */));
++			}
++			if (saved_reg_count >= 3) {
++				SLJIT_ASSERT(saved_reg_list[2] < 8);
++				FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */));
++			}
++		}
++
+ #if defined(__GNUC__)
+ 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+-			(op == SLJIT_LUDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
++			((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+ #else
+ #error "Software divmod functions are needed"
+ #endif
+-		if (compiler->scratches >= 4) {
+-			FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */));
+-			return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */);
+-		} else if (compiler->scratches >= 3)
+-			return push_inst32(compiler, 0xf85d2b08 /* ldr r2, [sp], #8 */);
++
++		if (saved_reg_count > 0) {
++			if (saved_reg_count >= 3) {
++				SLJIT_ASSERT(saved_reg_list[2] < 8);
++				FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */));
++			}
++			if (saved_reg_count >= 2) {
++				SLJIT_ASSERT(saved_reg_list[1] < 8);
++				FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */));
++			}
++			return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8)
++						| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
++		}
+ 		return SLJIT_SUCCESS;
+ 	}
+ 
+diff --git a/ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c b/ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c
+index 3e2c9f0..cf3535f 100644
+--- a/ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c
++++ b/ext/pcre/pcrelib/sljit/sljitNativeMIPS_common.c
+@@ -1053,8 +1053,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ #endif
+ 		FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
+ 		return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
+-	case SLJIT_LUDIV:
+-	case SLJIT_LSDIV:
++	case SLJIT_UDIVMOD:
++	case SLJIT_SDIVMOD:
++	case SLJIT_UDIVI:
++	case SLJIT_SDIVI:
++		SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
+ #if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+ 		FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+ 		FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+@@ -1062,15 +1065,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 
+ #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+ 		if (int_op)
+-			FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
++			FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
+ 		else
+-			FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
++			FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
+ #else
+-		FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
++		FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
+ #endif
+ 
+ 		FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
+-		return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
++		return (op >= SLJIT_UDIVI) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
+ 	}
+ 
+ 	return SLJIT_SUCCESS;
+diff --git a/ext/pcre/pcrelib/sljit/sljitNativePPC_common.c b/ext/pcre/pcrelib/sljit/sljitNativePPC_common.c
+index 08d5356..b6a043f 100644
+--- a/ext/pcre/pcrelib/sljit/sljitNativePPC_common.c
++++ b/ext/pcre/pcrelib/sljit/sljitNativePPC_common.c
+@@ -1267,22 +1267,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+ 		return push_inst(compiler, (op == SLJIT_LUMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
+ #endif
+-	case SLJIT_LUDIV:
+-	case SLJIT_LSDIV:
++	case SLJIT_UDIVMOD:
++	case SLJIT_SDIVMOD:
+ 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
+ #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+-		if (int_op) {
+-			FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+-			FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
+-		} else {
+-			FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVDU : DIVD) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
+-			FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
+-		}
+-		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
++		FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_UDIVMOD ? DIVWU : DIVW) : (op == SLJIT_UDIVMOD ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
++		FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
+ #else
+-		FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
++		FAIL_IF(push_inst(compiler, (op == SLJIT_UDIVMOD ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
+ 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
++#endif
+ 		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
++	case SLJIT_UDIVI:
++	case SLJIT_SDIVI:
++#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
++		return push_inst(compiler, (int_op ? (op == SLJIT_UDIVI ? DIVWU : DIVW) : (op == SLJIT_UDIVI ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
++#else
++		return push_inst(compiler, (op == SLJIT_UDIVI ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
+ #endif
+ 	}
+ 
+diff --git a/ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c b/ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c
+index 0b1927a..327c426 100644
+--- a/ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c
++++ b/ext/pcre/pcrelib/sljit/sljitNativeSPARC_common.c
+@@ -777,20 +777,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ #else
+ #error "Implementation required"
+ #endif
+-	case SLJIT_LUDIV:
+-	case SLJIT_LSDIV:
++	case SLJIT_UDIVMOD:
++	case SLJIT_SDIVMOD:
++	case SLJIT_UDIVI:
++	case SLJIT_SDIVI:
++		SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
+ #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+-		if (op == SLJIT_LUDIV)
++		if ((op | 0x2) == SLJIT_UDIVI)
+ 			FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS));
+ 		else {
+ 			FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1)));
+ 			FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS));
+ 		}
+-		FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2)));
+-		FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
++		if (op <= SLJIT_SDIVMOD)
++			FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2)));
++		FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
++		if (op >= SLJIT_UDIVI)
++			return SLJIT_SUCCESS;
+ 		FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1)));
+-		FAIL_IF(push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1)));
+-		return SLJIT_SUCCESS;
++		return push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1));
+ #else
+ #error "Implementation required"
+ #endif
+diff --git a/ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c b/ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c
+index 1d6aa5a..4d40392f 100644
+--- a/ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c
++++ b/ext/pcre/pcrelib/sljit/sljitNativeTILEGX_64.c
+@@ -35,21 +35,21 @@
+ #define SIMM_16BIT_MIN (-0x8000)
+ #define SIMM_17BIT_MAX (0xffff)
+ #define SIMM_17BIT_MIN (-0x10000)
+-#define SIMM_32BIT_MIN (-0x80000000)
+ #define SIMM_32BIT_MAX (0x7fffffff)
+-#define SIMM_48BIT_MIN (0x800000000000L)
++#define SIMM_32BIT_MIN (-0x7fffffff - 1)
+ #define SIMM_48BIT_MAX (0x7fffffff0000L)
++#define SIMM_48BIT_MIN (-0x800000000000L)
+ #define IMM16(imm) ((imm) & 0xffff)
+ 
+ #define UIMM_16BIT_MAX (0xffff)
+ 
+-#define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
+-#define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
+-#define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
+-#define ADDR_TMP (SLJIT_NO_REGISTERS + 4)
++#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
++#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
++#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
++#define ADDR_TMP (SLJIT_NUMBER_OF_REGISTERS + 5)
+ #define PIC_ADDR_REG TMP_REG2
+ 
+-static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
++static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
+ 	63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7
+ };
+ 
+@@ -58,11 +58,6 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
+ #define TMP_REG2_mapped 16
+ #define TMP_REG3_mapped 6
+ #define ADDR_TMP_mapped 7
+-#define SLJIT_SAVED_REG1_mapped 30
+-#define SLJIT_SAVED_REG2_mapped 31
+-#define SLJIT_SAVED_REG3_mapped 32
+-#define SLJIT_SAVED_EREG1_mapped 33
+-#define SLJIT_SAVED_EREG2_mapped 34
+ 
+ /* Flags are keept in volatile registers. */
+ #define EQUAL_FLAG 8
+@@ -399,6 +394,9 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
+ #define SUB(dst, srca, srcb) \
+ 	push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__)
+ 
++#define MUL(dst, srca, srcb) \
++	push_3_buffer(compiler, TILEGX_OPC_MULX, dst, srca, srcb, __LINE__)
++
+ #define NOR(dst, srca, srcb) \
+ 	push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__)
+ 
+@@ -547,8 +545,8 @@ const struct Format* compute_format()
+ 
+ 	const struct Format* match = NULL;
+ 	const struct Format *b = NULL;
+-	unsigned int i = 0;
+-	for (i; i < sizeof formats / sizeof formats[0]; i++) {
++	unsigned int i;
++	for (i = 0; i < sizeof formats / sizeof formats[0]; i++) {
+ 		b = &formats[i];
+ 		if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) {
+ 			match = b;
+@@ -625,7 +623,6 @@ tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst)
+ 
+ static sljit_si update_buffer(struct sljit_compiler *compiler)
+ {
+-	int count;
+ 	int i;
+ 	int orig_index = inst_buf_index;
+ 	struct jit_instr inst0 = inst_buf[0];
+@@ -738,8 +735,10 @@ static sljit_si update_buffer(struct sljit_compiler *compiler)
+ 
+ static sljit_si flush_buffer(struct sljit_compiler *compiler)
+ {
+-	while (inst_buf_index != 0)
+-		update_buffer(compiler);
++	while (inst_buf_index != 0) {
++		FAIL_IF(update_buffer(compiler));
++	}
++	return SLJIT_SUCCESS;
+ }
+ 
+ static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line)
+@@ -787,6 +786,7 @@ static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic o
+ 	case TILEGX_OPC_ADD:
+ 	case TILEGX_OPC_AND:
+ 	case TILEGX_OPC_SUB:
++	case TILEGX_OPC_MULX:
+ 	case TILEGX_OPC_OR:
+ 	case TILEGX_OPC_XOR:
+ 	case TILEGX_OPC_NOR:
+@@ -905,7 +905,6 @@ static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_
+ 	sljit_sw diff;
+ 	sljit_uw target_addr;
+ 	sljit_ins *inst;
+-	sljit_ins saved_inst;
+ 
+ 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
+ 		return code_ptr;
+@@ -1009,7 +1008,7 @@ SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compi
+ 	struct sljit_const *const_;
+ 
+ 	CHECK_ERROR_PTR();
+-	check_sljit_generate_code(compiler);
++	CHECK_PTR(check_sljit_generate_code(compiler));
+ 	reverse_buf(compiler);
+ 
+ 	code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
+@@ -1178,13 +1177,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
+ 	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+ {
+ 	sljit_ins base;
+-	sljit_ins bundle = 0;
+-
++	sljit_si i, tmp;
++ 
+ 	CHECK_ERROR();
+-	check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
++	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+ 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ 
+-	local_size += (saveds + 1) * sizeof(sljit_sw);
++	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+ 	local_size = (local_size + 7) & ~7;
+ 	compiler->local_size = local_size;
+ 
+@@ -1200,56 +1199,52 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
+ 		local_size = 0;
+ 	}
+ 
++	/* Save the return address. */
+ 	FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
+ 	FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8));
+ 
+-	if (saveds >= 1)
+-		FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8));
+-
+-	if (saveds >= 2)
+-		FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8));
+-
+-	if (saveds >= 3)
+-		FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8));
+-
+-	if (saveds >= 4)
+-		FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8));
+-
+-	if (saveds >= 5)
+-		FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8));
+-
+-	if (args >= 1)
+-		FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO));
++	/* Save the S registers. */
++	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
++	for (i = SLJIT_S0; i >= tmp; i--) {
++		FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8));
++	}
+ 
+-	if (args >= 2)
+-		FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO));
++	/* Save the R registers that need to be reserved. */
++	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
++		FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8));
++	}
+ 
+-	if (args >= 3)
+-		FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO));
++	/* Move the arguments to S registers. */
++	for (i = 0; i < args; i++) {
++		FAIL_IF(ADD(reg_map[SLJIT_S0 - i], i, ZERO));
++	}
+ 
+ 	return SLJIT_SUCCESS;
+ }
+ 
+-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler,
+ 	sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
+ 	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
+ {
+-	CHECK_ERROR_VOID();
+-	check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
++	CHECK_ERROR();
++	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
+ 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
+ 
+-	local_size += (saveds + 1) * sizeof(sljit_sw);
++	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
+ 	compiler->local_size = (local_size + 7) & ~7;
++
++	return SLJIT_SUCCESS;
+ }
+ 
+ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
+ {
+ 	sljit_si local_size;
+ 	sljit_ins base;
+-	int addr_initialized = 0;
++	sljit_si i, tmp;
++	sljit_si saveds;
+ 
+ 	CHECK_ERROR();
+-	check_sljit_emit_return(compiler, op, src, srcw);
++	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+ 
+ 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+ 
+@@ -1263,50 +1258,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi
+ 		local_size = 0;
+ 	}
+ 
++	/* Restore the return address. */
+ 	FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
+-	FAIL_IF(LD(RA, ADDR_TMP_mapped));
+-
+-	if (compiler->saveds >= 5) {
+-		FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48));
+-		addr_initialized = 1;
++	FAIL_IF(LD_ADD(RA, ADDR_TMP_mapped, -8));
+ 
+-		FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8));
++	/* Restore the S registers. */
++	saveds = compiler->saveds;
++	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
++	for (i = SLJIT_S0; i >= tmp; i--) {
++		FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8));
+ 	}
+ 
+-	if (compiler->saveds >= 4) {
+-		if (addr_initialized == 0) {
+-			FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40));
+-			addr_initialized = 1;
+-		}
+-
+-		FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8));
+-	}
+-
+-	if (compiler->saveds >= 3) {
+-		if (addr_initialized == 0) {
+-			FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32));
+-			addr_initialized = 1;
+-		}
+-
+-		FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8));
+-	}
+-
+-	if (compiler->saveds >= 2) {
+-		if (addr_initialized == 0) {
+-			FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24));
+-			addr_initialized = 1;
+-		}
+-
+-		FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8));
+-	}
+-
+-	if (compiler->saveds >= 1) {
+-		if (addr_initialized == 0) {
+-			FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16));
+-			/* addr_initialized = 1; no need to initialize as it's the last one. */
+-		}
+-
+-		FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8));
++	/* Restore the R registers that need to be reserved. */
++	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
++		FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8));
+ 	}
+ 
+ 	if (compiler->local_size <= SIMM_16BIT_MAX)
+@@ -1585,7 +1550,7 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit
+ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
+ {
+ 	CHECK_ERROR();
+-	check_sljit_emit_fast_enter(compiler, dst, dstw);
++	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+ 	ADJUST_LOCAL_OFFSET(dst, dstw);
+ 
+ 	/* For UNUSED dst. Uncommon, but possible. */
+@@ -1602,7 +1567,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c
+ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
+ {
+ 	CHECK_ERROR();
+-	check_sljit_emit_fast_return(compiler, src, srcw);
++	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
+ 	ADJUST_LOCAL_OFFSET(src, srcw);
+ 
+ 	if (FAST_IS_REG(src))
+@@ -1636,9 +1601,11 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
+ 			if (op == SLJIT_MOV_SI)
+ 				return BFEXTS(reg_map[dst], reg_map[src2], 0, 31);
+ 
+-		return BFEXTU(reg_map[dst], reg_map[src2], 0, 31);
+-		} else if (dst != src2)
+-			SLJIT_ASSERT_STOP();
++			return BFEXTU(reg_map[dst], reg_map[src2], 0, 31);
++		} else if (dst != src2) {
++			SLJIT_ASSERT(src2 == 0);
++			return ADD(reg_map[dst], reg_map[src2], ZERO);
++		}
+ 
+ 		return SLJIT_SUCCESS;
+ 
+@@ -1650,8 +1617,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
+ 				return BFEXTS(reg_map[dst], reg_map[src2], 0, 7);
+ 
+ 			return BFEXTU(reg_map[dst], reg_map[src2], 0, 7);
+-		} else if (dst != src2)
+-			SLJIT_ASSERT_STOP();
++		} else if (dst != src2) {
++			SLJIT_ASSERT(src2 == 0);
++			return ADD(reg_map[dst], reg_map[src2], ZERO);
++		}
+ 
+ 		return SLJIT_SUCCESS;
+ 
+@@ -1663,8 +1632,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
+ 				return BFEXTS(reg_map[dst], reg_map[src2], 0, 15);
+ 
+ 			return BFEXTU(reg_map[dst], reg_map[src2], 0, 15);
+-		} else if (dst != src2)
+-			SLJIT_ASSERT_STOP();
++		} else if (dst != src2) {
++			SLJIT_ASSERT(src2 == 0);
++			return ADD(reg_map[dst], reg_map[src2], ZERO);
++		}
+ 
+ 		return SLJIT_SUCCESS;
+ 
+@@ -1811,7 +1782,6 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
+ 				else {
+ 					/* Rare ocasion. */
+ 					FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
+-	
+ 					overflow_ra = TMP_EREG2;
+ 				}
+ 			}
+@@ -1903,6 +1873,17 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
+ 
+ 		return SLJIT_SUCCESS;
+ 
++	case SLJIT_MUL:
++		if (flags & SRC2_IMM) {
++			FAIL_IF(load_immediate(compiler, TMP_REG2_mapped, src2));
++			src2 = TMP_REG2;
++			flags &= ~SRC2_IMM;
++		}
++
++		FAIL_IF(MUL(reg_map[dst], reg_map[src1], reg_map[src2]));
++
++		return SLJIT_SUCCESS;
++
+ #define EMIT_LOGICAL(op_imm, op_norm) \
+ 	if (flags & SRC2_IMM) { \
+ 		FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \
+@@ -1950,8 +1931,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
+ 	} else { \
+ 		if (op & SLJIT_SET_E) \
+ 			FAIL_IF(push_3_buffer( \
+-				compiler, op_imm, reg_map[dst], reg_map[src1], \
+-				src2 & 0x3F, __LINE__)); \
++				compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
++				reg_map[src2], __LINE__)); \
+ 		if (CHECK_FLAGS(SLJIT_SET_E)) \
+ 			FAIL_IF(push_3_buffer( \
+ 				compiler, op_norm, reg_map[dst], reg_map[src1], \
+@@ -2105,66 +2086,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
+ {
+ 	sljit_si sugg_dst_ar, dst_ar;
+ 	sljit_si flags = GET_ALL_FLAGS(op);
++	sljit_si mem_type = (op & SLJIT_INT_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
+ 
+ 	CHECK_ERROR();
+-	check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
++	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ 	ADJUST_LOCAL_OFFSET(dst, dstw);
+ 
+ 	if (dst == SLJIT_UNUSED)
+ 		return SLJIT_SUCCESS;
+ 
+ 	op = GET_OPCODE(op);
++	if (op == SLJIT_MOV_SI || op == SLJIT_MOV_UI)
++		mem_type = INT_DATA | SIGNED_DATA;
+ 	sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2];
+ 
+ 	compiler->cache_arg = 0;
+ 	compiler->cache_argw = 0;
+ 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
+ 		ADJUST_LOCAL_OFFSET(src, srcw);
+-		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw));
++		FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw));
+ 		src = TMP_REG1;
+ 		srcw = 0;
+ 	}
+ 
+-	switch (type) {
+-	case SLJIT_C_EQUAL:
+-	case SLJIT_C_NOT_EQUAL:
++	switch (type & 0xff) {
++	case SLJIT_EQUAL:
++	case SLJIT_NOT_EQUAL:
+ 		FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1));
+ 		dst_ar = sugg_dst_ar;
+ 		break;
+-	case SLJIT_C_LESS:
+-	case SLJIT_C_GREATER_EQUAL:
+-	case SLJIT_C_FLOAT_LESS:
+-	case SLJIT_C_FLOAT_GREATER_EQUAL:
++	case SLJIT_LESS:
++	case SLJIT_GREATER_EQUAL:
+ 		dst_ar = ULESS_FLAG;
+ 		break;
+-	case SLJIT_C_GREATER:
+-	case SLJIT_C_LESS_EQUAL:
+-	case SLJIT_C_FLOAT_GREATER:
+-	case SLJIT_C_FLOAT_LESS_EQUAL:
++	case SLJIT_GREATER:
++	case SLJIT_LESS_EQUAL:
+ 		dst_ar = UGREATER_FLAG;
+ 		break;
+-	case SLJIT_C_SIG_LESS:
+-	case SLJIT_C_SIG_GREATER_EQUAL:
++	case SLJIT_SIG_LESS:
++	case SLJIT_SIG_GREATER_EQUAL:
+ 		dst_ar = LESS_FLAG;
+ 		break;
+-	case SLJIT_C_SIG_GREATER:
+-	case SLJIT_C_SIG_LESS_EQUAL:
++	case SLJIT_SIG_GREATER:
++	case SLJIT_SIG_LESS_EQUAL:
+ 		dst_ar = GREATER_FLAG;
+ 		break;
+-	case SLJIT_C_OVERFLOW:
+-	case SLJIT_C_NOT_OVERFLOW:
++	case SLJIT_OVERFLOW:
++	case SLJIT_NOT_OVERFLOW:
+ 		dst_ar = OVERFLOW_FLAG;
+ 		break;
+-	case SLJIT_C_MUL_OVERFLOW:
+-	case SLJIT_C_MUL_NOT_OVERFLOW:
++	case SLJIT_MUL_OVERFLOW:
++	case SLJIT_MUL_NOT_OVERFLOW:
+ 		FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1));
+ 		dst_ar = sugg_dst_ar;
+ 		type ^= 0x1; /* Flip type bit for the XORI below. */
+ 		break;
+-	case SLJIT_C_FLOAT_EQUAL:
+-	case SLJIT_C_FLOAT_NOT_EQUAL:
+-		dst_ar = EQUAL_FLAG;
+-		break;
+ 
+ 	default:
+ 		SLJIT_ASSERT_STOP();
+@@ -2180,11 +2156,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
+ 	if (op >= SLJIT_ADD) {
+ 		if (TMP_REG2_mapped != dst_ar)
+ 			FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO));
+-		return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
++		return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+ 	}
+ 
+ 	if (dst & SLJIT_MEM)
+-		return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw);
++		return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw);
+ 
+ 	if (sugg_dst_ar != dst_ar)
+ 		return ADD(sugg_dst_ar, dst_ar, ZERO);
+@@ -2194,7 +2170,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
+ 
+ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) {
+ 	CHECK_ERROR();
+-	check_sljit_emit_op0(compiler, op);
++	CHECK(check_sljit_emit_op0(compiler, op));
+ 
+ 	op = GET_OPCODE(op);
+ 	switch (op) {
+@@ -2204,10 +2180,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 	case SLJIT_BREAKPOINT:
+ 		return PI(BPT);
+ 
+-	case SLJIT_UMUL:
+-	case SLJIT_SMUL:
+-	case SLJIT_UDIV:
+-	case SLJIT_SDIV:
++	case SLJIT_LUMUL:
++	case SLJIT_LSMUL:
++	case SLJIT_UDIVI:
++	case SLJIT_SDIVI:
+ 		SLJIT_ASSERT_STOP();
+ 	}
+ 
+@@ -2217,7 +2193,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw)
+ {
+ 	CHECK_ERROR();
+-	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
++	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
+ 	ADJUST_LOCAL_OFFSET(dst, dstw);
+ 	ADJUST_LOCAL_OFFSET(src, srcw);
+ 
+@@ -2273,7 +2249,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler
+ 		return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
+ 
+ 	case SLJIT_CLZ:
+-		return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
++		return emit_op(compiler, op, (op & SLJIT_INT_OP) ? INT_DATA : WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+ 	}
+ 
+ 	return SLJIT_SUCCESS;
+@@ -2282,7 +2258,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler
+ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w)
+ {
+ 	CHECK_ERROR();
+-	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
++	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+ 	ADJUST_LOCAL_OFFSET(dst, dstw);
+ 	ADJUST_LOCAL_OFFSET(src1, src1w);
+ 	ADJUST_LOCAL_OFFSET(src2, src2w);
+@@ -2325,7 +2301,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_comp
+ 	flush_buffer(compiler);
+ 
+ 	CHECK_ERROR_PTR();
+-	check_sljit_emit_label(compiler);
++	CHECK_PTR(check_sljit_emit_label(compiler));
+ 
+ 	if (compiler->last_label && compiler->last_label->size == compiler->size)
+ 		return compiler->last_label;
+@@ -2344,7 +2320,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil
+ 	flush_buffer(compiler);
+ 
+ 	CHECK_ERROR();
+-	check_sljit_emit_ijump(compiler, type, src, srcw);
++	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+ 	ADJUST_LOCAL_OFFSET(src, srcw);
+ 
+ 	if (FAST_IS_REG(src)) {
+@@ -2404,8 +2380,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil
+ 
+ 		return SLJIT_SUCCESS;
+ 
+-	} else if (src & SLJIT_MEM)
++	} else if (src & SLJIT_MEM) {
+ 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
++		flush_buffer(compiler);
++	}
+ 
+ 	FAIL_IF(JR_SOLO(reg_map[src_r]));
+ 
+@@ -2432,7 +2410,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil
+ 	flush_buffer(compiler);
+ 
+ 	CHECK_ERROR_PTR();
+-	check_sljit_emit_jump(compiler, type);
++	CHECK_PTR(check_sljit_emit_jump(compiler, type));
+ 
+ 	jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
+ 	PTR_FAIL_IF(!jump);
+@@ -2440,48 +2418,42 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil
+ 	type &= 0xff;
+ 
+ 	switch (type) {
+-	case SLJIT_C_EQUAL:
+-	case SLJIT_C_FLOAT_NOT_EQUAL:
++	case SLJIT_EQUAL:
+ 		BR_NZ(EQUAL_FLAG);
+ 		break;
+-	case SLJIT_C_NOT_EQUAL:
+-	case SLJIT_C_FLOAT_EQUAL:
++	case SLJIT_NOT_EQUAL:
+ 		BR_Z(EQUAL_FLAG);
+ 		break;
+-	case SLJIT_C_LESS:
+-	case SLJIT_C_FLOAT_LESS:
++	case SLJIT_LESS:
+ 		BR_Z(ULESS_FLAG);
+ 		break;
+-	case SLJIT_C_GREATER_EQUAL:
+-	case SLJIT_C_FLOAT_GREATER_EQUAL:
++	case SLJIT_GREATER_EQUAL:
+ 		BR_NZ(ULESS_FLAG);
+ 		break;
+-	case SLJIT_C_GREATER:
+-	case SLJIT_C_FLOAT_GREATER:
++	case SLJIT_GREATER:
+ 		BR_Z(UGREATER_FLAG);
+ 		break;
+-	case SLJIT_C_LESS_EQUAL:
+-	case SLJIT_C_FLOAT_LESS_EQUAL:
++	case SLJIT_LESS_EQUAL:
+ 		BR_NZ(UGREATER_FLAG);
+ 		break;
+-	case SLJIT_C_SIG_LESS:
++	case SLJIT_SIG_LESS:
+ 		BR_Z(LESS_FLAG);
+ 		break;
+-	case SLJIT_C_SIG_GREATER_EQUAL:
++	case SLJIT_SIG_GREATER_EQUAL:
+ 		BR_NZ(LESS_FLAG);
+ 		break;
+-	case SLJIT_C_SIG_GREATER:
++	case SLJIT_SIG_GREATER:
+ 		BR_Z(GREATER_FLAG);
+ 		break;
+-	case SLJIT_C_SIG_LESS_EQUAL:
++	case SLJIT_SIG_LESS_EQUAL:
+ 		BR_NZ(GREATER_FLAG);
+ 		break;
+-	case SLJIT_C_OVERFLOW:
+-	case SLJIT_C_MUL_OVERFLOW:
++	case SLJIT_OVERFLOW:
++	case SLJIT_MUL_OVERFLOW:
+ 		BR_Z(OVERFLOW_FLAG);
+ 		break;
+-	case SLJIT_C_NOT_OVERFLOW:
+-	case SLJIT_C_MUL_NOT_OVERFLOW:
++	case SLJIT_NOT_OVERFLOW:
++	case SLJIT_MUL_NOT_OVERFLOW:
+ 		BR_NZ(OVERFLOW_FLAG);
+ 		break;
+ 	default:
+@@ -2536,7 +2508,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_comp
+ 	flush_buffer(compiler);
+ 
+ 	CHECK_ERROR_PTR();
+-	check_sljit_emit_const(compiler, dst, dstw, init_value);
++	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
+ 	ADJUST_LOCAL_OFFSET(dst, dstw);
+ 
+ 	const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const));
+@@ -2572,3 +2544,18 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
+ 	inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43);
+ 	SLJIT_CACHE_FLUSH(inst, inst + 4);
+ }
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
++{
++	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
++	return reg_map[reg];
++}
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
++	void *instruction, sljit_si size)
++{
++	CHECK_ERROR();
++	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
++	return SLJIT_ERR_UNSUPPORTED;
++}
++
+diff --git a/ext/pcre/pcrelib/sljit/sljitNativeX86_common.c b/ext/pcre/pcrelib/sljit/sljitNativeX86_common.c
+index 22a163f..416c15a 100644
+--- a/ext/pcre/pcrelib/sljit/sljitNativeX86_common.c
++++ b/ext/pcre/pcrelib/sljit/sljitNativeX86_common.c
+@@ -273,7 +273,9 @@ static sljit_si cpu_has_sse2 = -1;
+ #endif
+ static sljit_si cpu_has_cmov = -1;
+ 
+-#if defined(_MSC_VER) && _MSC_VER >= 1400
++#ifdef _WIN32_WCE
++#include <cmnintrin.h>
++#elif defined(_MSC_VER) && _MSC_VER >= 1400
+ #include <intrin.h>
+ #endif
+ 
+@@ -742,8 +744,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 		break;
+ 	case SLJIT_LUMUL:
+ 	case SLJIT_LSMUL:
+-	case SLJIT_LUDIV:
+-	case SLJIT_LSDIV:
++	case SLJIT_UDIVMOD:
++	case SLJIT_SDIVMOD:
++	case SLJIT_UDIVI:
++	case SLJIT_SDIVI:
+ 		compiler->flags_saved = 0;
+ #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ #ifdef _WIN64
+@@ -761,9 +765,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ #endif
+ 		compiler->mode32 = op & SLJIT_INT_OP;
+ #endif
++		SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
+ 
+ 		op = GET_OPCODE(op);
+-		if (op == SLJIT_LUDIV) {
++		if ((op | 0x2) == SLJIT_UDIVI) {
+ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+ 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
+ 			inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
+@@ -774,7 +779,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 			*inst = XOR_r_rm;
+ 		}
+ 
+-		if (op == SLJIT_LSDIV) {
++		if ((op | 0x2) == SLJIT_SDIVI) {
+ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+ 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
+ #endif
+@@ -805,10 +810,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 		FAIL_IF(!inst);
+ 		INC_SIZE(2);
+ 		*inst++ = GROUP_F7;
+-		*inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
++		*inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
+ #else
+ #ifdef _WIN64
+-		size = (!compiler->mode32 || op >= SLJIT_LUDIV) ? 3 : 2;
++		size = (!compiler->mode32 || op >= SLJIT_UDIVMOD) ? 3 : 2;
+ #else
+ 		size = (!compiler->mode32) ? 3 : 2;
+ #endif
+@@ -817,11 +822,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 		INC_SIZE(size);
+ #ifdef _WIN64
+ 		if (!compiler->mode32)
+-			*inst++ = REX_W | ((op >= SLJIT_LUDIV) ? REX_B : 0);
+-		else if (op >= SLJIT_LUDIV)
++			*inst++ = REX_W | ((op >= SLJIT_UDIVMOD) ? REX_B : 0);
++		else if (op >= SLJIT_UDIVMOD)
+ 			*inst++ = REX_B;
+ 		*inst++ = GROUP_F7;
+-		*inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
++		*inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
+ #else
+ 		if (!compiler->mode32)
+ 			*inst++ = REX_W;
+@@ -836,15 +841,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
+ 		case SLJIT_LSMUL:
+ 			*inst |= IMUL;
+ 			break;
+-		case SLJIT_LUDIV:
++		case SLJIT_UDIVMOD:
++		case SLJIT_UDIVI:
+ 			*inst |= DIV;
+ 			break;
+-		case SLJIT_LSDIV:
++		case SLJIT_SDIVMOD:
++		case SLJIT_SDIVI:
+ 			*inst |= IDIV;
+ 			break;
+ 		}
+ #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
+-		EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
++		if (op <= SLJIT_SDIVMOD)
++			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
++#else
++		if (op >= SLJIT_UDIVI)
++			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
+ #endif
+ 		break;
+ 	}
+@@ -1905,60 +1916,62 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler,
+ 		return SLJIT_SUCCESS;
+ 	}
+ 
+-	if (FAST_IS_REG(src1)) {
++	if (!(src1 & SLJIT_IMM)) {
+ 		if (src2 & SLJIT_IMM) {
+ #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ 			if (IS_HALFWORD(src2w) || compiler->mode32) {
+-				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
++				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
+ 				FAIL_IF(!inst);
+ 				*inst = GROUP_F7;
+ 			}
+ 			else {
+ 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
+-				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
++				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
+ 				FAIL_IF(!inst);
+ 				*inst = TEST_rm_r;
+ 			}
+ #else
+-			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
++			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
+ 			FAIL_IF(!inst);
+ 			*inst = GROUP_F7;
+ #endif
++			return SLJIT_SUCCESS;
+ 		}
+-		else {
++		else if (FAST_IS_REG(src1)) {
+ 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
+ 			FAIL_IF(!inst);
+ 			*inst = TEST_rm_r;
++			return SLJIT_SUCCESS;
+ 		}
+-		return SLJIT_SUCCESS;
+ 	}
+ 
+-	if (FAST_IS_REG(src2)) {
++	if (!(src2 & SLJIT_IMM)) {
+ 		if (src1 & SLJIT_IMM) {
+ #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ 			if (IS_HALFWORD(src1w) || compiler->mode32) {
+-				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
++				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
+ 				FAIL_IF(!inst);
+ 				*inst = GROUP_F7;
+ 			}
+ 			else {
+ 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
+-				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
++				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
+ 				FAIL_IF(!inst);
+ 				*inst = TEST_rm_r;
+ 			}
+ #else
+-			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
++			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
+ 			FAIL_IF(!inst);
+ 			*inst = GROUP_F7;
+ #endif
++			return SLJIT_SUCCESS;
+ 		}
+-		else {
++		else if (FAST_IS_REG(src2)) {
+ 			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
+ 			FAIL_IF(!inst);
+ 			*inst = TEST_rm_r;
++			return SLJIT_SUCCESS;
+ 		}
+-		return SLJIT_SUCCESS;
+ 	}
+ 
+ 	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+@@ -2923,3 +2936,69 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
+ {
+ 	*(sljit_sw*)addr = new_constant;
+ }
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
++{
++#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
++	if (cpu_has_sse2 == -1)
++		get_cpu_features();
++	return cpu_has_sse2;
++#else
++	return 1;
++#endif
++}
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
++{
++	if (cpu_has_cmov == -1)
++		get_cpu_features();
++	return cpu_has_cmov;
++}
++
++SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
++	sljit_si type,
++	sljit_si dst_reg,
++	sljit_si src, sljit_sw srcw)
++{
++	sljit_ub* inst;
++
++	CHECK_ERROR();
++#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
++	CHECK_ARGUMENT(sljit_x86_is_cmov_available());
++	CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
++	CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
++	CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
++	FUNCTION_CHECK_SRC(src, srcw);
++#endif
++#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
++	if (SLJIT_UNLIKELY(!!compiler->verbose)) {
++		fprintf(compiler->verbose, "  x86_cmov%s %s%s, ",
++			!(dst_reg & SLJIT_INT_OP) ? "" : ".i",
++			JUMP_PREFIX(type), jump_names[type & 0xff]);
++		sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
++		fprintf(compiler->verbose, ", ");
++		sljit_verbose_param(compiler, src, srcw);
++		fprintf(compiler->verbose, "\n");
++	}
++#endif
++
++	ADJUST_LOCAL_OFFSET(src, srcw);
++	CHECK_EXTRA_REGS(src, srcw, (void)0);
++
++#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
++	compiler->mode32 = dst_reg & SLJIT_INT_OP;
++#endif
++	dst_reg &= ~SLJIT_INT_OP;
++
++	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
++		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
++		src = TMP_REG1;
++		srcw = 0;
++	}
++
++	inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
++	FAIL_IF(!inst);
++	*inst++ = GROUP_0F;
++	*inst = get_jump_code(type & 0xff) - 0x40;
++	return SLJIT_SUCCESS;
++}
-- 
cgit