| /* |
| * Copyright (C) 2009 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package libcore.java.nio.charset; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CoderResult; |
| import java.nio.charset.CodingErrorAction; |
| import java.util.Arrays; |
| |
| public class CharsetEncoderTest extends junit.framework.TestCase { |
| // None of the harmony or jtreg tests actually check that replaceWith does the right thing! |
| public void test_replaceWith() throws Exception { |
| Charset ascii = Charset.forName("US-ASCII"); |
| CharsetEncoder e = ascii.newEncoder(); |
| e.onMalformedInput(CodingErrorAction.REPLACE); |
| e.onUnmappableCharacter(CodingErrorAction.REPLACE); |
| e.replaceWith("=".getBytes("US-ASCII")); |
| String input = "hello\u0666world"; |
| String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString(); |
| assertEquals("hello=world", output); |
| } |
| |
| private void assertReplacementBytesForEncoder(String charset, byte[] bytes) { |
| byte[] result = Charset.forName(charset).newEncoder().replacement(); |
| assertEquals(Arrays.toString(bytes), Arrays.toString(result)); |
| } |
| |
| // For all the guaranteed built-in charsets, check that we have the right default replacements. |
| public void test_defaultReplacementBytesIso_8859_1() throws Exception { |
| assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' }); |
| } |
| public void test_defaultReplacementBytesUs_Ascii() throws Exception { |
| assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' }); |
| } |
| public void test_defaultReplacementBytesUtf_16() throws Exception { |
| assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd }); |
| } |
| public void test_defaultReplacementBytesUtf_16be() throws Exception { |
| assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd }); |
| } |
| public void test_defaultReplacementBytesUtf_16le() throws Exception { |
| assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff }); |
| } |
| public void test_defaultReplacementBytesUtf_8() throws Exception { |
| assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' }); |
| } |
| |
| public void testSurrogatePairAllAtOnce() throws Exception { |
| // okay: surrogate pair seen all at once is decoded to U+20b9f. |
| Charset cs = Charset.forName("UTF-32BE"); |
| CharsetEncoder e = cs.newEncoder(); |
| ByteBuffer bb = ByteBuffer.allocate(128); |
| CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842', '\udf9f' }), bb, false); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(4, bb.position()); |
| assertEquals((byte) 0x00, bb.get(0)); |
| assertEquals((byte) 0x02, bb.get(1)); |
| assertEquals((byte) 0x0b, bb.get(2)); |
| assertEquals((byte) 0x9f, bb.get(3)); |
| } |
| |
| public void testMalformedSurrogatePair() throws Exception { |
| // malformed: low surrogate first is detected as an error. |
| Charset cs = Charset.forName("UTF-32BE"); |
| CharsetEncoder e = cs.newEncoder(); |
| ByteBuffer bb = ByteBuffer.allocate(128); |
| CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false); |
| assertTrue(cr.toString(), cr.isMalformed()); |
| assertEquals(1, cr.length()); |
| } |
| |
| public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE_RI() throws Exception { |
| testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.IGNORE); |
| } |
| |
| public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT_RI() throws Exception { |
| testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPORT); |
| } |
| |
| public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE_RI() throws Exception { |
| testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPLACE); |
| } |
| |
| private void testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction cea) throws Exception { |
| // stupid: on the RI, writing the two halves of the surrogate pair in separate writes |
| // is an error because the CharsetEncoder doesn't remember it's half-way through a |
| // surrogate pair across the two calls! |
| |
| // IGNORE just ignores both characters, REPORT complains that the second is |
| // invalid (because it doesn't remember seeing the first), and REPLACE inserts a |
| // replacement character U+fffd when it sees the second character (because it too |
| // doesn't remember seeing the first). |
| |
| Charset cs = Charset.forName("UTF-32BE"); |
| CharsetEncoder e = cs.newEncoder(); |
| e.onMalformedInput(cea); |
| e.onUnmappableCharacter(cea); |
| ByteBuffer bb = ByteBuffer.allocate(128); |
| CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(0, bb.position()); |
| cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false); |
| if (cea == CodingErrorAction.REPORT) { |
| assertTrue(cr.toString(), cr.isMalformed()); |
| assertEquals(1, cr.length()); |
| return; |
| } |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| int expectedPosition = 0; |
| if (cea == CodingErrorAction.REPLACE) { |
| expectedPosition = 4; |
| assertEquals(expectedPosition, bb.position()); |
| System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position()))); |
| assertEquals((byte) 0x00, bb.get(0)); |
| assertEquals((byte) 0x00, bb.get(1)); |
| assertEquals((byte) 0xff, bb.get(2)); |
| assertEquals((byte) 0xfd, bb.get(3)); |
| } |
| assertEquals(expectedPosition, bb.position()); |
| cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(expectedPosition, bb.position()); |
| cr = e.flush(bb); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(expectedPosition, bb.position()); |
| } |
| |
| public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE() throws Exception { |
| testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.IGNORE); |
| } |
| |
| public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT() throws Exception { |
| testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPORT); |
| } |
| |
| public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE() throws Exception { |
| testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPLACE); |
| } |
| |
| private void testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction cea) throws Exception { |
| // Writing the two halves of the surrogate pair in separate writes works just fine. |
| // This is true of Android and ICU, but not of the RI. |
| Charset cs = Charset.forName("UTF-32BE"); |
| CharsetEncoder e = cs.newEncoder(); |
| e.onMalformedInput(cea); |
| e.onUnmappableCharacter(cea); |
| ByteBuffer bb = ByteBuffer.allocate(128); |
| CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(0, bb.position()); |
| cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| int expectedPosition = 4; |
| assertEquals(expectedPosition, bb.position()); |
| System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position()))); |
| assertEquals((byte) 0x00, bb.get(0)); |
| assertEquals((byte) 0x02, bb.get(1)); |
| assertEquals((byte) 0x0b, bb.get(2)); |
| assertEquals((byte) 0x9f, bb.get(3)); |
| cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(expectedPosition, bb.position()); |
| cr = e.flush(bb); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(expectedPosition, bb.position()); |
| } |
| |
| public void testFlushWithoutEndOfInput() throws Exception { |
| Charset cs = Charset.forName("UTF-32BE"); |
| CharsetEncoder e = cs.newEncoder(); |
| ByteBuffer bb = ByteBuffer.allocate(128); |
| CoderResult cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, false); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(4, bb.position()); |
| try { |
| cr = e.flush(bb); |
| } catch (IllegalStateException expected) { |
| // you must call encode with endOfInput true before you can flush. |
| } |
| |
| // We had a bug where we wouldn't reset inEnd before calling encode in implFlush. |
| // That would result in flush outputting garbage. |
| cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, true); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(8, bb.position()); |
| cr = e.flush(bb); |
| assertEquals(CoderResult.UNDERFLOW, cr); |
| assertEquals(8, bb.position()); |
| } |
| } |