summaryrefslogtreecommitdiffstats
path: root/icu.icu5483.backport.patch
diff options
context:
space:
mode:
Diffstat (limited to 'icu.icu5483.backport.patch')
-rw-r--r--icu.icu5483.backport.patch874
1 files changed, 874 insertions, 0 deletions
diff --git a/icu.icu5483.backport.patch b/icu.icu5483.backport.patch
new file mode 100644
index 0000000..039dee2
--- /dev/null
+++ b/icu.icu5483.backport.patch
@@ -0,0 +1,874 @@
+diff -ru icu.orig/source/common/ucnv2022.c icu/source/common/ucnv2022.c
+--- icu.orig/source/common/ucnv2022.c 2009-06-02 11:48:38.000000000 +0100
++++ icu/source/common/ucnv2022.c 2009-06-02 12:30:29.000000000 +0100
+@@ -84,6 +84,26 @@
+ #define V_TAB 0x0B
+ #define SPACE 0x20
+
++enum {
++ HWKANA_START=0xff61,
++ HWKANA_END=0xff9f
++};
++
++/*
++ * 94-character sets with native byte values A1..FE are encoded in ISO 2022
++ * as bytes 21..7E. (Subtract 0x80.)
++ * 96-character sets with native byte values A0..FF are encoded in ISO 2022
++ * as bytes 20..7F. (Subtract 0x80.)
++ * Do not encode C1 control codes with native bytes 80..9F
++ * as bytes 00..1F (C0 control codes).
++ */
++enum {
++ GR94_START=0xa1,
++ GR94_END=0xfe,
++ GR96_START=0xa0,
++ GR96_END=0xff
++};
++
+ /*
+ * ISO 2022 control codes must not be converted from Unicode
+ * because they would mess up the byte stream.
+@@ -981,22 +1001,27 @@
+
+
+ /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
+- * any future change in _MBCSFromUChar32() function should be reflected in
+- * this macro
++ * any future change in _MBCSFromUChar32() function should be reflected here.
++ * @return number of bytes in *value; negative number if fallback; 0 if no mapping
+ */
+-static U_INLINE void
++static U_INLINE int32_t
+ MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
+ UChar32 c,
+ uint32_t* value,
+ UBool useFallback,
+- int32_t *length,
+ int outputType)
+ {
+ const int32_t *cx;
+ const uint16_t *table;
+ uint32_t stage2Entry;
+ uint32_t myValue;
++ int32_t length;
+ const uint8_t *p;
++ /*
++ * TODO(markus): Use and require new, faster MBCS conversion table structures.
++ * Use internal version of ucnv_open() that verifies that the new structures are available,
++ * else U_INTERNAL_PROGRAM_ERROR.
++ */
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+ table=sharedData->mbcs.fromUnicodeTable;
+@@ -1005,51 +1030,60 @@
+ if(outputType==MBCS_OUTPUT_2){
+ myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ if(myValue<=0xff) {
+- *length=1;
++ length=1;
+ } else {
+- *length=2;
++ length=2;
+ }
+ } else /* outputType==MBCS_OUTPUT_3 */ {
+ p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
+ myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
+ if(myValue<=0xff) {
+- *length=1;
++ length=1;
+ } else if(myValue<=0xffff) {
+- *length=2;
++ length=2;
+ } else {
+- *length=3;
++ length=3;
+ }
+ }
++ /*
++ * TODO(markus): Use Shift-JIS table for JIS X 0208, to save mapping table space.
++ * Pass in parameter for type of output bytes, for validation and shifting:
++ * - Direct: Pass bytes through, but forbid control codes 00-1F (except SI/SO/ESC) and space 20?
++ * (Need to allow some (TAB/LF/CR) or most of them for ASCII and maybe JIS X 0201.)
++ * - A1-FE: Subtract 80 after range check.
++ * - SJIS: Shift DBCS result to 21-7E x 21-7E.
++ */
+ /* is this code point assigned, or do we use fallbacks? */
+- if( (stage2Entry&(1<<(16+(c&0xf))))!=0 ||
+- (FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0)
+- ) {
++ if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
++ /* assigned */
++ *value=myValue;
++ return length;
++ } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
+ /*
+ * We allow a 0 byte output if the "assigned" bit is set for this entry.
+ * There is no way with this data structure for fallback output
+ * to be a zero byte.
+ */
+- /* assigned */
+ *value=myValue;
+- return;
++ return -length;
+ }
+ }
+
+ cx=sharedData->mbcs.extIndexes;
+ if(cx!=NULL) {
+- *length=ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
+- return;
++ return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
+ }
+
+ /* unassigned */
+- *length=0;
++ return 0;
+ }
+
+ /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
+- * any future change in _MBCSSingleFromUChar32() function should be reflected in
+- * this macro
++ * any future change in _MBCSSingleFromUChar32() function should be reflected here.
++ * @param retval pointer to output byte
++ * @return 1 roundtrip byte 0 no mapping -1 fallback byte
+ */
+-static U_INLINE void
++static U_INLINE int32_t
+ MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
+ UChar32 c,
+ uint32_t* retval,
+@@ -1059,20 +1093,21 @@
+ int32_t value;
+ /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
+ if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
+- *retval=(uint16_t)-1;
+- return;
++ return 0;
+ }
+ /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
+ table=sharedData->mbcs.fromUnicodeTable;
+ /* get the byte for the output */
+ value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
+ /* is this code point assigned, or do we use fallbacks? */
+- if(useFallback ? value>=0x800 : value>=0xc00) {
+- value &=0xff;
++ *retval=(uint32_t)(value&0xff);
++ if(value>=0xf00) {
++ return 1; /* roundtrip */
++ } else if(useFallback ? value>=0x800 : value>=0xc00) {
++ return -1; /* fallback taken */
+ } else {
+- value= -1;
++ return 0; /* no mapping */
+ }
+- *retval=(uint16_t) value;
+ }
+
+ #ifdef U_ENABLE_GENERIC_ISO_2022
+@@ -1316,6 +1351,7 @@
+
+ static void
+ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
++ UConverter *cnv = args->converter;
+ UConverterDataISO2022 *converterData;
+ ISO2022State *pFromU2022State;
+ uint8_t *target = (uint8_t *) args->target;
+@@ -1335,14 +1371,13 @@
+ int8_t cs, g;
+
+ /* set up the state */
+- converterData = (UConverterDataISO2022*)args->converter->extraInfo;
++ converterData = (UConverterDataISO2022*)cnv->extraInfo;
+ pFromU2022State = &converterData->fromU2022State;
+- useFallback = args->converter->useFallback;
+
+ choiceCount = 0;
+
+ /* check if the last codepoint of previous buffer was a lead surrogate*/
+- if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
++ if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
+ goto getTrail;
+ }
+
+@@ -1361,26 +1396,26 @@
+ if(UTF_IS_SECOND_SURROGATE(trail)) {
+ source++;
+ sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+- args->converter->fromUChar32=0x00;
++ cnv->fromUChar32=0x00;
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* no more input */
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ }
+@@ -1389,7 +1424,7 @@
+ if(IS_2022_CONTROL(sourceChar)) {
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+
+@@ -1407,9 +1442,10 @@
+
+ /* JIS7/8: try single-byte half-width Katakana before JISX208 */
+ if(converterData->version == 3 || converterData->version == 4) {
+- choices[choiceCount++] = cs = (int8_t)HWKANA_7BIT;
+- csm &= ~CSM(cs);
++ choices[choiceCount++] = (int8_t)HWKANA_7BIT;
+ }
++ /* Do not try single-byte half-width Katakana for other versions. */
++ csm &= ~CSM(HWKANA_7BIT);
+
+ /* try the current G0 charset */
+ choices[choiceCount++] = cs = pFromU2022State->cs[0];
+@@ -1432,86 +1468,134 @@
+ }
+
+ cs = g = 0;
++ /*
++ * len==0: no mapping found yet
++ * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
++ * len>0: found a roundtrip result, done
++ */
+ len = 0;
++ /*
++ * We will turn off useFallback after finding a fallback,
++ * but we still get fallbacks from PUA code points as usual.
++ * Therefore, we will also need to check that we don't overwrite
++ * an early fallback with a later one.
++ */
++ useFallback = cnv->useFallback;
+
+- for(i = 0; i < choiceCount && len == 0; ++i) {
+- cs = choices[i];
+- switch(cs) {
++ for(i = 0; i < choiceCount && len <= 0; ++i) {
++ uint32_t value;
++ int32_t len2;
++ int8_t cs0 = choices[i];
++ switch(cs0) {
+ case ASCII:
+ if(sourceChar <= 0x7f) {
+ targetValue = (uint32_t)sourceChar;
+ len = 1;
++ cs = cs0;
++ g = 0;
+ }
+ break;
+ case ISO8859_1:
+- if(0x80 <= sourceChar && sourceChar <= 0xff) {
++ if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
+ targetValue = (uint32_t)sourceChar - 0x80;
+ len = 1;
++ cs = cs0;
+ g = 2;
+ }
+ break;
+ case HWKANA_7BIT:
+- if((uint32_t)(0xff9f-sourceChar)<=(0xff9f-0xff61)) {
+- targetValue = (uint32_t)(sourceChar - (0xff61 - 0x21));
+- len = 1;
+-
++ if((uint32_t)(HWKANA_END-sourceChar)<=(HWKANA_END-HWKANA_START)) {
+ if(converterData->version==3) {
+ /* JIS7: use G1 (SO) */
+- pFromU2022State->cs[1] = cs; /* do not output an escape sequence */
++ /* Shift U+FF61..U+FF9F to bytes 21..5F. */
++ targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));
++ len = 1;
++ pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
+ g = 1;
+ } else if(converterData->version==4) {
+ /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
+- int8_t cs0;
+-
+- targetValue += 0x80;
++ /* Shift U+FF61..U+FF9F to bytes A1..DF. */
++ targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));
++ len = 1;
+
+- cs0 = pFromU2022State->cs[0];
+- if(IS_JP_DBCS(cs0)) {
++ cs = pFromU2022State->cs[0];
++ if(IS_JP_DBCS(cs)) {
+ /* switch from a DBCS charset to JISX201 */
+ cs = (int8_t)JISX201;
+- } else {
+- /* stay in the current G0 charset */
+- cs = cs0;
+ }
++ /* else stay in the current G0 charset */
++ g = 0;
+ }
++ /* else do not use HWKANA_7BIT with other versions */
+ }
+ break;
+ case JISX201:
+ /* G0 SBCS */
+- MBCS_SINGLE_FROM_UCHAR32(
+- converterData->myConverterArray[cs],
+- sourceChar, &targetValue,
+- useFallback);
+- if(targetValue <= 0x7f) {
+- len = 1;
++ len2 = MBCS_SINGLE_FROM_UCHAR32(
++ converterData->myConverterArray[cs0],
++ sourceChar, &value,
++ useFallback);
++ if(len2 != 0 && !(len2 < 0 && len != 0) && value <= 0x7f) {
++ targetValue = value;
++ len = len2;
++ cs = cs0;
++ g = 0;
++ useFallback = FALSE;
+ }
+ break;
+ case ISO8859_7:
+ /* G0 SBCS forced to 7-bit output */
+- MBCS_SINGLE_FROM_UCHAR32(
+- converterData->myConverterArray[cs],
+- sourceChar, &targetValue,
+- useFallback);
+- if(0x80 <= targetValue && targetValue <= 0xff) {
+- targetValue -= 0x80;
+- len = 1;
++ len2 = MBCS_SINGLE_FROM_UCHAR32(
++ converterData->myConverterArray[cs0],
++ sourceChar, &value,
++ useFallback);
++ if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
++ targetValue = value - 0x80;
++ len = len2;
++ cs = cs0;
+ g = 2;
++ useFallback = FALSE;
+ }
+ break;
+ default:
+ /* G0 DBCS */
+- MBCS_FROM_UCHAR32_ISO2022(
+- converterData->myConverterArray[cs],
+- sourceChar, &targetValue,
+- useFallback, &len, MBCS_OUTPUT_2);
+- if(len != 2) {
+- len = 0;
++ len2 = MBCS_FROM_UCHAR32_ISO2022(
++ converterData->myConverterArray[cs0],
++ sourceChar, &value,
++ useFallback, MBCS_OUTPUT_2);
++ if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
++ if(cs0 == KSC5601) {
++ /*
++ * Check for valid bytes for the encoding scheme.
++ * This is necessary because the sub-converter (windows-949)
++ * has a broader encoding scheme than is valid for 2022.
++ *
++ * Check that the result is a 2-byte value with each byte in the range A1..FE
++ * (strict EUC-KR DBCS) before accepting it and subtracting 0x80 from each byte
++ * to move it to the ISO 2022 range 21..7E.
++ */
++ if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
++ (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
++ ) {
++ value -= 0x8080; /* shift down to 21..7e byte range */
++ } else {
++ break; /* not valid for ISO 2022 */
++ }
++ }
++ targetValue = value;
++ len = len2;
++ cs = cs0;
++ g = 0;
++ useFallback = FALSE;
+ }
+ break;
+ }
+ }
+
+- if(len > 0) {
++ if(len != 0) {
++ if(len < 0) {
++ len = -len; /* fallback */
++ }
+ outLen = 0; /* count output bytes */
+
+ /* write SI if necessary (only for JIS7) */
+@@ -1560,7 +1644,7 @@
+ * then this is an error
+ */
+ *err = U_INVALID_CHAR_FOUND;
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+
+@@ -1586,7 +1670,7 @@
+ }
+ } else {
+ fromUWriteUInt8(
+- args->converter,
++ cnv,
+ buffer, outLen,
+ &target, (const char *)targetLimit,
+ &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
+@@ -1615,7 +1699,7 @@
+ */
+ if( U_SUCCESS(*err) &&
+ (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
+- args->flush && source>=sourceLimit && args->converter->fromUChar32==0
++ args->flush && source>=sourceLimit && cnv->fromUChar32==0
+ ) {
+ int32_t sourceIndex;
+
+@@ -1654,7 +1738,7 @@
+ }
+
+ fromUWriteUInt8(
+- args->converter,
++ cnv,
+ buffer, outLen,
+ &target, (const char *)targetLimit,
+ &offsets, sourceIndex,
+@@ -1777,7 +1861,7 @@
+ !IS_JP_DBCS(cs)
+ ) {
+ /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
+- targetUniChar = mySourceChar + (0xff61 - 0xa1);
++ targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
+
+ /* return from a single-shift state to the previous one */
+ if(pToU2022State->g >= 2) {
+@@ -1818,7 +1902,7 @@
+ case HWKANA_7BIT:
+ if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {
+ /* 7-bit halfwidth Katakana */
+- targetUniChar = mySourceChar + (0xff61 - 0x21);
++ targetUniChar = mySourceChar + (HWKANA_START - 0x21);
+ }
+ break;
+ default:
+@@ -1965,9 +2049,10 @@
+ break;
+ }
+
+- /* length= ucnv_MBCSFromUChar32(converterData->currentConverter->sharedData,
+- sourceChar,&targetByteUnit,args->converter->useFallback);*/
+- MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,&length,MBCS_OUTPUT_2);
++ length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
++ if(length < 0) {
++ length = -length; /* fallback */
++ }
+ /* only DBCS or SBCS characters are expected*/
+ /* DB characters with high bit set to 1 are expected */
+ if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){
+@@ -2449,7 +2534,7 @@
+
+ static void
+ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
+-
++ UConverter *cnv = args->converter;
+ UConverterDataISO2022 *converterData;
+ ISO2022State *pFromU2022State;
+ uint8_t *target = (uint8_t *) args->target;
+@@ -2466,14 +2551,13 @@
+ UBool useFallback;
+
+ /* set up the state */
+- converterData = (UConverterDataISO2022*)args->converter->extraInfo;
++ converterData = (UConverterDataISO2022*)cnv->extraInfo;
+ pFromU2022State = &converterData->fromU2022State;
+- useFallback = args->converter->useFallback;
+
+ choiceCount = 0;
+
+ /* check if the last codepoint of previous buffer was a lead surrogate*/
+- if((sourceChar = args->converter->fromUChar32)!=0 && target< targetLimit) {
++ if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
+ goto getTrail;
+ }
+
+@@ -2492,26 +2576,26 @@
+ if(UTF_IS_SECOND_SURROGATE(trail)) {
+ source++;
+ sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);
+- args->converter->fromUChar32=0x00;
++ cnv->fromUChar32=0x00;
+ /* convert this supplementary code point */
+ /* exit this condition tree */
+ } else {
+ /* this is an unmatched lead code unit (1st surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* no more input */
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ } else {
+ /* this is an unmatched trail code unit (2nd surrogate) */
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ }
+@@ -2522,7 +2606,7 @@
+ if(IS_2022_CONTROL(sourceChar)) {
+ /* callback(illegal) */
+ *err=U_ILLEGAL_CHAR_FOUND;
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+
+@@ -2545,7 +2629,6 @@
+ }
+ else{
+ /* convert U+0080..U+10ffff */
+- UConverterSharedData *cnv;
+ int32_t i;
+ int8_t cs, g;
+
+@@ -2593,17 +2676,41 @@
+ }
+
+ cs = g = 0;
++ /*
++ * len==0: no mapping found yet
++ * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
++ * len>0: found a roundtrip result, done
++ */
+ len = 0;
++ /*
++ * We will turn off useFallback after finding a fallback,
++ * but we still get fallbacks from PUA code points as usual.
++ * Therefore, we will also need to check that we don't overwrite
++ * an early fallback with a later one.
++ */
++ useFallback = cnv->useFallback;
+
+- for(i = 0; i < choiceCount && len == 0; ++i) {
+- cs = choices[i];
+- if(cs > 0) {
+- if(cs > CNS_11643_0) {
+- cnv = converterData->myConverterArray[CNS_11643];
+- MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_3);
+- if(len==3) {
+- cs = (int8_t)(CNS_11643_0 + (targetValue >> 16) - 0x80);
+- len = 2;
++ for(i = 0; i < choiceCount && len <= 0; ++i) {
++ int8_t cs0 = choices[i];
++ if(cs0 > 0) {
++ uint32_t value;
++ int32_t len2;
++ if(cs0 > CNS_11643_0) {
++ len2 = MBCS_FROM_UCHAR32_ISO2022(
++ converterData->myConverterArray[CNS_11643],
++ sourceChar,
++ &value,
++ useFallback,
++ MBCS_OUTPUT_3);
++ if(len2 == 3 || (len2 == -3 && len == 0)) {
++ targetValue = value;
++ cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);
++ if(len2 >= 0) {
++ len = 2;
++ } else {
++ len = -2;
++ useFallback = FALSE;
++ }
+ if(cs == CNS_11643_1) {
+ g = 1;
+ } else if(cs == CNS_11643_2) {
+@@ -2617,15 +2724,25 @@
+ }
+ } else {
+ /* GB2312_1 or ISO-IR-165 */
+- cnv = converterData->myConverterArray[cs];
+- MBCS_FROM_UCHAR32_ISO2022(cnv,sourceChar,&targetValue,useFallback,&len,MBCS_OUTPUT_2);
+- g = 1; /* used if len == 2 */
++ len2 = MBCS_FROM_UCHAR32_ISO2022(
++ converterData->myConverterArray[cs0],
++ sourceChar,
++ &value,
++ useFallback,
++ MBCS_OUTPUT_2);
++ if(len2 == 2 || (len2 == -2 && len == 0)) {
++ targetValue = value;
++ len = len2;
++ cs = cs0;
++ g = 1;
++ useFallback = FALSE;
++ }
+ }
+ }
+ }
+
+- if(len > 0) {
+- len = 0; /* count output bytes; it must have been len == 2 */
++ if(len != 0) {
++ len = 0; /* count output bytes; it must have been abs(len) == 2 */
+
+ /* write the designation sequence if necessary */
+ if(cs != pFromU2022State->cs[g]) {
+@@ -2670,7 +2787,7 @@
+ * then this is an error
+ */
+ *err = U_INVALID_CHAR_FOUND;
+- args->converter->fromUChar32=sourceChar;
++ cnv->fromUChar32=sourceChar;
+ break;
+ }
+ }
+@@ -2691,7 +2808,7 @@
+ }
+ } else {
+ fromUWriteUInt8(
+- args->converter,
++ cnv,
+ buffer, len,
+ &target, (const char *)targetLimit,
+ &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),
+@@ -2720,7 +2837,7 @@
+ */
+ if( U_SUCCESS(*err) &&
+ pFromU2022State->g!=0 &&
+- args->flush && source>=sourceLimit && args->converter->fromUChar32==0
++ args->flush && source>=sourceLimit && cnv->fromUChar32==0
+ ) {
+ int32_t sourceIndex;
+
+@@ -2748,7 +2865,7 @@
+ }
+
+ fromUWriteUInt8(
+- args->converter,
++ cnv,
+ SHIFT_IN_STR, 1,
+ &target, (const char *)targetLimit,
+ &offsets, sourceIndex,
+@@ -3146,7 +3263,7 @@
+ }
+ if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
+ /* include half-width Katakana for JP */
+- sa->addRange(sa->set, 0xff61, 0xff9f);
++ sa->addRange(sa->set, HWKANA_START, HWKANA_END);
+ }
+ break;
+ case 'c':
+diff -ru icu.orig/source/common/ucnv_ext.c icu/source/common/ucnv_ext.c
+--- icu.orig/source/common/ucnv_ext.c 2009-06-02 11:48:38.000000000 +0100
++++ icu/source/common/ucnv_ext.c 2009-06-02 12:14:20.000000000 +0100
+@@ -551,6 +551,12 @@
+ return 0;
+ }
+
++ /*
++ * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0:
++ * Do not interpret values with reserved bits used, for forward compatibility,
++ * and do not even remember intermediate results with reserved bits used.
++ */
++
+ if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
+ /* partial match, enter the loop below */
+ index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+@@ -575,7 +581,8 @@
+ value=*fromUSectionValues++;
+ if( value!=0 &&
+ (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+- FROM_U_USE_FALLBACK(useFallback, firstCP))
++ FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
++ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
+ ) {
+ /* remember longest match so far */
+ matchValue=value;
+@@ -613,8 +620,9 @@
+ /* partial match, continue */
+ index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+ } else {
+- if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+- FROM_U_USE_FALLBACK(useFallback, firstCP)
++ if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
++ FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
++ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
+ ) {
+ /* full match, stop with result */
+ matchValue=value;
+@@ -632,8 +640,9 @@
+ return 0;
+ }
+ } else /* result from firstCP trie lookup */ {
+- if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+- FROM_U_USE_FALLBACK(useFallback, firstCP)
++ if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
++ FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
++ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
+ ) {
+ /* full match, stop with result */
+ matchValue=value;
+@@ -644,20 +653,18 @@
+ }
+ }
+
+- if(matchValue&UCNV_EXT_FROM_U_RESERVED_MASK) {
+- /* do not interpret values with reserved bits used, for forward compatibility */
+- return 0;
+- }
+-
+ /* return result */
+ if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) {
+ return 1; /* assert matchLength==2 */
+ }
+
+- *pMatchValue=UCNV_EXT_FROM_U_MASK_ROUNDTRIP(matchValue);
++ *pMatchValue=matchValue;
+ return matchLength;
+ }
+
++/*
++ * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits
++ */
+ static U_INLINE void
+ ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
+ uint32_t value,
+@@ -792,6 +799,10 @@
+ }
+ }
+
++/*
++ * Used by ISO 2022 implementation.
++ * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping
++ */
+ U_CFUNC int32_t
+ ucnv_extSimpleMatchFromU(const int32_t *cx,
+ UChar32 cp, uint32_t *pValue,
+@@ -809,13 +820,15 @@
+ if(match>=2) {
+ /* write result for simple, single-character conversion */
+ int32_t length;
+-
++ int isRoundtrip;
++
++ isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value);
+ length=UCNV_EXT_FROM_U_GET_LENGTH(value);
+ value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
+
+ if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
+ *pValue=value;
+- return length;
++ return isRoundtrip ? length : -length;
+ #if 0 /* not currently used */
+ } else if(length==4) {
+ /* de-serialize a 4-byte result */
+@@ -825,7 +838,7 @@
+ ((uint32_t)result[1]<<16)|
+ ((uint32_t)result[2]<<8)|
+ result[3];
+- return 4;
++ return isRoundtrip ? 4 : -4;
+ #endif
+ }
+ }
+diff -ru icu.orig/source/common/ucnv_ext.h icu/source/common/ucnv_ext.h
+--- icu.orig/source/common/ucnv_ext.h 2009-06-02 11:48:38.000000000 +0100
++++ icu/source/common/ucnv_ext.h 2009-06-02 12:14:20.000000000 +0100
+@@ -452,7 +452,7 @@
+ #define UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)!=0)
+ #define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)
+
+-/* use after masking off the roundtrip flag */
++/* get length; masks away all other bits */
+ #define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES)
+
+ /* get bytes or bytes index */
+diff -ru icu.orig/source/common/ucnvmbcs.c icu/source/common/ucnvmbcs.c
+--- icu.orig/source/common/ucnvmbcs.c 2009-06-02 11:48:38.000000000 +0100
++++ icu/source/common/ucnvmbcs.c 2009-06-02 12:14:20.000000000 +0100
+@@ -3785,7 +3785,8 @@
+
+ cx=sharedData->mbcs.extIndexes;
+ if(cx!=NULL) {
+- return ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
++ length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
++ return length>=0 ? length : -length; /* return abs(length); */
+ }
+
+ /* unassigned */
+diff -ru icu.orig/source/test/testdata/conversion.txt icu/source/test/testdata/conversion.txt
+--- icu.orig/source/test/testdata/conversion.txt 2009-06-02 11:48:26.000000000 +0100
++++ icu/source/test/testdata/conversion.txt 2009-06-02 12:14:20.000000000 +0100
+@@ -495,6 +495,46 @@
+ }
+ { "UTF-16BE", :bin{ 00 }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ 00 } }
+ { "UTF-16BE", :bin{ d800dc }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ d800dc } }
++ // Verify that mappings that would result in byte values outside 20..7F (for SBCS)
++ // or 21..7E (for DBCS) are not used.
++ // ibm-9005_X110-2007.ucm (ISO 8859-7, <ESC>.F=1b2e46):
++ // <U009F> \x9F |0 (also in ISO 8859-1)
++ // <U0387> \xB7 |1
++ // windows-949-2000 (KSC_5601, <ESC>$(C=1b242843):
++ // <UC829> \xA0\xA1 |0
++ // <UD4FE> \xC0\x41 |0
++ // <UD79D> \xC8\xFE |0
++ {
++ "JIS8", // =ISO_2022,locale=ja,version=4
++ "\u009f\u0387\uc829\ud4fe\ud79d",
++ :bin{ 1a1b2e461b4e371a1a1b242843487e1b2842 },
++ :intvector{ 0,1,1,1,1,1,1,2,3,4,4,4,4,4,4,4,4,4 },
++ :int{1}, :int{1}, "", "?", ""
++ }
++ // Ticket 5483: ISO 2022 converter incorrectly using fallback mapping
++ // Verify that a roundtrip mapping is used even when a fallback mapping is
++ // available in the current state.
++ // U+FF61 is handled in code
++ // jisx-208.ucm (<ESC>$B=1b2442):
++ // <U30FE> \x21\x34 |0
++ // <UFF5D> \x21\x51 |0 and
++ // ibm-897_P100-1995.ucm (JIS X 0201, <ESC>(J=1b284a):
++ // <UFF5D> \x7D |1
++ // ibm-9005_X110-2007.ucm (ISO 8859-7, <ESC>.F=1b2e46):
++ // <U03D5> \xF6 |1
++ // <U2015> \xAF |0
++ // <UFF5D> \x7D |1 (not legal for ISO 2022)
++ // windows-949-2000 (KSC_5601, <ESC>$(C=1b242843):
++ // <UAC00> \xB0\xA1 |0
++ // <UFF5D> \xA3\xFD |0
++ // <U223C> \xA1\xAD |0 (in extension table)
++ {
++ "JIS8", // =ISO_2022,locale=ja,version=4
++ "a\uff61\u03d5\uff5d\uac00\u223c\uff5d\u30fe\uff5d", // Make it switch to ISO-8859-7, KSC 5601 and JIS X 0208.
++ :bin{ 61a11b2e461b4e761b244221511b2428433021212d237d1b2442213421511b2842 },
++ :intvector{ 0,1,2,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,4,5,5,6,6,7,7,7,7,7,8,8,8,8,8 },
++ :int{1}, :int{1}, "", "?", ""
++ }
+
+ // e4b8 is a partial sequence
+ { "UTF-8", :bin{ 31e4ba8ce4b8 }, "1\u4e8c", :intvector{ 0, 1 }, :int{1}, :int{0}, "truncated", ".", :bin{ e4b8 } }