Skip to content

Commit

Permalink
fix the surrogate utf8 feature when custom characterEscapes is used (#…
Browse files Browse the repository at this point in the history
…1399)

Co-authored-by: stack_underflow <[email protected]>
  • Loading branch information
stackunderflow111 and stack_underflow authored Feb 5, 2025
1 parent 0aa97a7 commit 671f8ed
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 0 deletions.
6 changes: 6 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -461,3 +461,9 @@ Justin Gosselin (@jgosselin-accesso)
* Reported #1359: Non-surrogate characters being incorrectly combined when
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
(2.18.2)

Haruki (@stackunderflow111)
* Reported #1398: feature COMBINE_UNICODE_SURROGATES_IN_UTF8 doesn't work
when custom characterEscape is used
(2.18.2)
4 changes: 4 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ a pure JSON library.
(reported by @Rodenstock)
(fix contributed by @pjfanning)

#1398: Fix issue that feature COMBINE_UNICODE_SURROGATES_IN_UTF8 doesn't work
when custom characterEscape is used
(reported and fixed by @stackunderflow111)

2.18.2 (27-Nov-2024)

#1359: Non-surrogate characters being incorrectly combined when
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1732,6 +1732,16 @@ private final void _writeCustomStringSegment2(final char[] cbuf, int offset, fin
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// 3- or 4-byte character
if (_isStartOfSurrogatePair(ch)) {
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = cbuf[offset++];
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
continue;
}
}
outputPtr = _outputMultiByteChar(ch, outputPtr);
}
}
Expand Down Expand Up @@ -1789,6 +1799,16 @@ private final void _writeCustomStringSegment2(final String text, int offset, fin
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// 3- or 4-byte character
if (_isStartOfSurrogatePair(ch)) {
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
char lowSurrogate = text.charAt(offset++);
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
continue;
}
}
outputPtr = _outputMultiByteChar(ch, outputPtr);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,21 @@ void checkNonSurrogates() throws Exception {
assertTrue(json.contains("foo\u3042bar"));
assertTrue(json.contains("\"test_emoji\":\"\uD83D\uDE0A\""));
}

@Test
void checkSurrogateWithCharacterEscapes() throws Exception {
JsonFactory f = JsonFactory.builder()
.enable(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8)
.build();
f.setCharacterEscapes(JsonpCharacterEscapes.instance());
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (JsonGenerator gen = f.createGenerator(out)) {
gen.writeStartObject();
// Outside the BMP; 0x1F60A - emoji
gen.writeStringField("test_emoji", new String(Character.toChars(0x1F60A)));
gen.writeEndObject();
}
String json = out.toString("UTF-8");
assertEquals("{\"test_emoji\":\"\uD83D\uDE0A\"}", json);
}
}

0 comments on commit 671f8ed

Please sign in to comment.