blob: d733a9fb74892d396b0f9e3e8ea62141d0463b38 [file] [log] [blame]
/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package libcore.java.nio.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.util.Arrays;
public class CharsetEncoderTest extends junit.framework.TestCase {
// None of the harmony or jtreg tests actually check that replaceWith does the right thing!
public void test_replaceWith() throws Exception {
Charset ascii = Charset.forName("US-ASCII");
CharsetEncoder e = ascii.newEncoder();
e.onMalformedInput(CodingErrorAction.REPLACE);
e.onUnmappableCharacter(CodingErrorAction.REPLACE);
e.replaceWith("=".getBytes("US-ASCII"));
String input = "hello\u0666world";
String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString();
assertEquals("hello=world", output);
}
private void assertReplacementBytesForEncoder(String charset, byte[] bytes) {
byte[] result = Charset.forName(charset).newEncoder().replacement();
assertEquals(Arrays.toString(bytes), Arrays.toString(result));
}
// For all the guaranteed built-in charsets, check that we have the right default replacements.
public void test_defaultReplacementBytesIso_8859_1() throws Exception {
assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' });
}
public void test_defaultReplacementBytesUs_Ascii() throws Exception {
assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' });
}
public void test_defaultReplacementBytesUtf_16() throws Exception {
assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd });
}
public void test_defaultReplacementBytesUtf_16be() throws Exception {
assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd });
}
public void test_defaultReplacementBytesUtf_16le() throws Exception {
assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff });
}
public void test_defaultReplacementBytesUtf_8() throws Exception {
assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' });
}
public void testSurrogatePairAllAtOnce() throws Exception {
// okay: surrogate pair seen all at once is decoded to U+20b9f.
Charset cs = Charset.forName("UTF-32BE");
CharsetEncoder e = cs.newEncoder();
ByteBuffer bb = ByteBuffer.allocate(128);
CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842', '\udf9f' }), bb, false);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(4, bb.position());
assertEquals((byte) 0x00, bb.get(0));
assertEquals((byte) 0x02, bb.get(1));
assertEquals((byte) 0x0b, bb.get(2));
assertEquals((byte) 0x9f, bb.get(3));
}
public void testMalformedSurrogatePair() throws Exception {
// malformed: low surrogate first is detected as an error.
Charset cs = Charset.forName("UTF-32BE");
CharsetEncoder e = cs.newEncoder();
ByteBuffer bb = ByteBuffer.allocate(128);
CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
assertTrue(cr.toString(), cr.isMalformed());
assertEquals(1, cr.length());
}
public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE_RI() throws Exception {
testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.IGNORE);
}
public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT_RI() throws Exception {
testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPORT);
}
public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE_RI() throws Exception {
testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPLACE);
}
private void testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction cea) throws Exception {
// stupid: on the RI, writing the two halves of the surrogate pair in separate writes
// is an error because the CharsetEncoder doesn't remember it's half-way through a
// surrogate pair across the two calls!
// IGNORE just ignores both characters, REPORT complains that the second is
// invalid (because it doesn't remember seeing the first), and REPLACE inserts a
// replacement character U+fffd when it sees the second character (because it too
// doesn't remember seeing the first).
Charset cs = Charset.forName("UTF-32BE");
CharsetEncoder e = cs.newEncoder();
e.onMalformedInput(cea);
e.onUnmappableCharacter(cea);
ByteBuffer bb = ByteBuffer.allocate(128);
CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(0, bb.position());
cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
if (cea == CodingErrorAction.REPORT) {
assertTrue(cr.toString(), cr.isMalformed());
assertEquals(1, cr.length());
return;
}
assertEquals(CoderResult.UNDERFLOW, cr);
int expectedPosition = 0;
if (cea == CodingErrorAction.REPLACE) {
expectedPosition = 4;
assertEquals(expectedPosition, bb.position());
System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position())));
assertEquals((byte) 0x00, bb.get(0));
assertEquals((byte) 0x00, bb.get(1));
assertEquals((byte) 0xff, bb.get(2));
assertEquals((byte) 0xfd, bb.get(3));
}
assertEquals(expectedPosition, bb.position());
cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(expectedPosition, bb.position());
cr = e.flush(bb);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(expectedPosition, bb.position());
}
public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE() throws Exception {
testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.IGNORE);
}
public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT() throws Exception {
testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPORT);
}
public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE() throws Exception {
testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPLACE);
}
private void testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction cea) throws Exception {
// Writing the two halves of the surrogate pair in separate writes works just fine.
// This is true of Android and ICU, but not of the RI.
Charset cs = Charset.forName("UTF-32BE");
CharsetEncoder e = cs.newEncoder();
e.onMalformedInput(cea);
e.onUnmappableCharacter(cea);
ByteBuffer bb = ByteBuffer.allocate(128);
CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(0, bb.position());
cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
assertEquals(CoderResult.UNDERFLOW, cr);
int expectedPosition = 4;
assertEquals(expectedPosition, bb.position());
System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position())));
assertEquals((byte) 0x00, bb.get(0));
assertEquals((byte) 0x02, bb.get(1));
assertEquals((byte) 0x0b, bb.get(2));
assertEquals((byte) 0x9f, bb.get(3));
cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(expectedPosition, bb.position());
cr = e.flush(bb);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(expectedPosition, bb.position());
}
public void testFlushWithoutEndOfInput() throws Exception {
Charset cs = Charset.forName("UTF-32BE");
CharsetEncoder e = cs.newEncoder();
ByteBuffer bb = ByteBuffer.allocate(128);
CoderResult cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, false);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(4, bb.position());
try {
cr = e.flush(bb);
} catch (IllegalStateException expected) {
// you must call encode with endOfInput true before you can flush.
}
// We had a bug where we wouldn't reset inEnd before calling encode in implFlush.
// That would result in flush outputting garbage.
cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, true);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(8, bb.position());
cr = e.flush(bb);
assertEquals(CoderResult.UNDERFLOW, cr);
assertEquals(8, bb.position());
}
}