summaryrefslogtreecommitdiffstats
path: root/icu.9283.regexcmp.crash.patch
diff options
context:
space:
mode:
Diffstat (limited to 'icu.9283.regexcmp.crash.patch')
-rw-r--r--icu.9283.regexcmp.crash.patch36
1 files changed, 36 insertions, 0 deletions
diff --git a/icu.9283.regexcmp.crash.patch b/icu.9283.regexcmp.crash.patch
new file mode 100644
index 0000000..9cf7e3e
--- /dev/null
+++ b/icu.9283.regexcmp.crash.patch
@@ -0,0 +1,36 @@
+--- icu/source/i18n/regexcmp.cpp (revision 31398)
++++ icu/source/i18n/regexcmp.cpp (revision 31782)
+@@ -3307,8 +3307,29 @@
+
+ case URX_STRING_I:
+- // TODO: Is the case-folded string the longest?
+- // If so we can optimize this the same as URX_STRING.
+- loc++;
+- currentLen = INT32_MAX;
++ // TODO: This code assumes that any user string that matches will be no longer
++ // than our compiled string, with case insensitive matching.
++ // Our compiled string has been case-folded already.
++ //
++ // Any matching user string will have no more code points than our
++ // compiled (folded) string. Folding may add code points, but
++ // not remove them.
++ //
++ // There is a potential problem if a supplemental code point
++ // case-folds to a BMP code point. In this case our compiled string
++ // could be shorter (in code units) than a matching user string.
++ //
++ // At this time (Unicode 6.1) there are no such characters, and this case
++ // is not being handled. A test, intltest regex/Bug9283, will fail if
++ // any problematic characters are added to Unicode.
++ //
++ // If this happens, we can make a set of the BMP chars that the
++ // troublesome supplementals fold to, scan our string, and bump the
++ // currentLen one extra for each that is found.
++ //
++ {
++ loc++;
++ int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
++ currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp));
++ }
+ break;
+