luni/src/test/java/libcore/java/nio/charset/CharsetEncoderTest.java - platform/libcore2 - Git at Google

 /*
  * Copyright (C) 2009 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package libcore.java.nio.charset;

 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
 import java.util.Arrays;

 public class CharsetEncoderTest extends junit.framework.TestCase {
     // None of the harmony or jtreg tests actually check that replaceWith does the right thing!
     public void test_replaceWith() throws Exception {
         Charset ascii = Charset.forName("US-ASCII");
         CharsetEncoder e = ascii.newEncoder();
         e.onMalformedInput(CodingErrorAction.REPLACE);
         e.onUnmappableCharacter(CodingErrorAction.REPLACE);
         e.replaceWith("=".getBytes("US-ASCII"));
         String input = "hello\u0666world";
         String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString();
         assertEquals("hello=world", output);
     }

     private void assertReplacementBytesForEncoder(String charset, byte[] bytes) {
         byte[] result = Charset.forName(charset).newEncoder().replacement();
         assertEquals(Arrays.toString(bytes), Arrays.toString(result));
     }

     // For all the guaranteed built-in charsets, check that we have the right default replacements.
     public void test_defaultReplacementBytesIso_8859_1() throws Exception {
         assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' });
     }
     public void test_defaultReplacementBytesUs_Ascii() throws Exception {
         assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' });
     }
     public void test_defaultReplacementBytesUtf_16() throws Exception {
         assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd });
     }
     public void test_defaultReplacementBytesUtf_16be() throws Exception {
         assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd });
     }
     public void test_defaultReplacementBytesUtf_16le() throws Exception {
         assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff });
     }
     public void test_defaultReplacementBytesUtf_8() throws Exception {
         assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' });
     }

     public void testSurrogatePairAllAtOnce() throws Exception {
         // okay: surrogate pair seen all at once is decoded to U+20b9f.
         Charset cs = Charset.forName("UTF-32BE");
         CharsetEncoder e = cs.newEncoder();
         ByteBuffer bb = ByteBuffer.allocate(128);
         CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842', '\udf9f' }), bb, false);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(4, bb.position());
         assertEquals((byte) 0x00, bb.get(0));
         assertEquals((byte) 0x02, bb.get(1));
         assertEquals((byte) 0x0b, bb.get(2));
         assertEquals((byte) 0x9f, bb.get(3));
     }

     public void testMalformedSurrogatePair() throws Exception {
         // malformed: low surrogate first is detected as an error.
         Charset cs = Charset.forName("UTF-32BE");
         CharsetEncoder e = cs.newEncoder();
         ByteBuffer bb = ByteBuffer.allocate(128);
         CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
         assertTrue(cr.toString(), cr.isMalformed());
         assertEquals(1, cr.length());
     }

     public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE_RI() throws Exception {
         testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.IGNORE);
     }

     public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT_RI() throws Exception {
         testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPORT);
     }

     public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE_RI() throws Exception {
         testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPLACE);
     }

     private void testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction cea) throws Exception {
         // stupid: on the RI, writing the two halves of the surrogate pair in separate writes
         // is an error because the CharsetEncoder doesn't remember it's half-way through a
         // surrogate pair across the two calls!

         // IGNORE just ignores both characters, REPORT complains that the second is
         // invalid (because it doesn't remember seeing the first), and REPLACE inserts a
         // replacement character U+fffd when it sees the second character (because it too
         // doesn't remember seeing the first).

         Charset cs = Charset.forName("UTF-32BE");
         CharsetEncoder e = cs.newEncoder();
         e.onMalformedInput(cea);
         e.onUnmappableCharacter(cea);
         ByteBuffer bb = ByteBuffer.allocate(128);
         CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(0, bb.position());
         cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
         if (cea == CodingErrorAction.REPORT) {
             assertTrue(cr.toString(), cr.isMalformed());
             assertEquals(1, cr.length());
             return;
         }
         assertEquals(CoderResult.UNDERFLOW, cr);
         int expectedPosition = 0;
         if (cea == CodingErrorAction.REPLACE) {
             expectedPosition = 4;
             assertEquals(expectedPosition, bb.position());
             System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position())));
             assertEquals((byte) 0x00, bb.get(0));
             assertEquals((byte) 0x00, bb.get(1));
             assertEquals((byte) 0xff, bb.get(2));
             assertEquals((byte) 0xfd, bb.get(3));
         }
         assertEquals(expectedPosition, bb.position());
         cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(expectedPosition, bb.position());
         cr = e.flush(bb);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(expectedPosition, bb.position());
     }

     public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE() throws Exception {
         testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.IGNORE);
     }

     public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT() throws Exception {
         testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPORT);
     }

     public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE() throws Exception {
         testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPLACE);
     }

     private void testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction cea) throws Exception {
         // Writing the two halves of the surrogate pair in separate writes works just fine.
         // This is true of Android and ICU, but not of the RI.
         Charset cs = Charset.forName("UTF-32BE");
         CharsetEncoder e = cs.newEncoder();
         e.onMalformedInput(cea);
         e.onUnmappableCharacter(cea);
         ByteBuffer bb = ByteBuffer.allocate(128);
         CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(0, bb.position());
         cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
         assertEquals(CoderResult.UNDERFLOW, cr);
         int expectedPosition = 4;
         assertEquals(expectedPosition, bb.position());
         System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position())));
         assertEquals((byte) 0x00, bb.get(0));
         assertEquals((byte) 0x02, bb.get(1));
         assertEquals((byte) 0x0b, bb.get(2));
         assertEquals((byte) 0x9f, bb.get(3));
         cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(expectedPosition, bb.position());
         cr = e.flush(bb);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(expectedPosition, bb.position());
     }

     public void testFlushWithoutEndOfInput() throws Exception {
         Charset cs = Charset.forName("UTF-32BE");
         CharsetEncoder e = cs.newEncoder();
         ByteBuffer bb = ByteBuffer.allocate(128);
         CoderResult cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, false);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(4, bb.position());
         try {
             cr = e.flush(bb);
         } catch (IllegalStateException expected) {
             // you must call encode with endOfInput true before you can flush.
         }

         // We had a bug where we wouldn't reset inEnd before calling encode in implFlush.
         // That would result in flush outputting garbage.
         cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, true);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(8, bb.position());
         cr = e.flush(bb);
         assertEquals(CoderResult.UNDERFLOW, cr);
         assertEquals(8, bb.position());
     }
 }
	/*
	* Copyright (C) 2009 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package libcore.java.nio.charset;

	import java.nio.ByteBuffer;
	import java.nio.CharBuffer;
	import java.nio.charset.Charset;
	import java.nio.charset.CharsetEncoder;
	import java.nio.charset.CoderResult;
	import java.nio.charset.CodingErrorAction;
	import java.util.Arrays;

	public class CharsetEncoderTest extends junit.framework.TestCase {
	// None of the harmony or jtreg tests actually check that replaceWith does the right thing!
	public void test_replaceWith() throws Exception {
	Charset ascii = Charset.forName("US-ASCII");
	CharsetEncoder e = ascii.newEncoder();
	e.onMalformedInput(CodingErrorAction.REPLACE);
	e.onUnmappableCharacter(CodingErrorAction.REPLACE);
	e.replaceWith("=".getBytes("US-ASCII"));
	String input = "hello\u0666world";
	String output = ascii.decode(e.encode(CharBuffer.wrap(input))).toString();
	assertEquals("hello=world", output);
	}

	private void assertReplacementBytesForEncoder(String charset, byte[] bytes) {
	byte[] result = Charset.forName(charset).newEncoder().replacement();
	assertEquals(Arrays.toString(bytes), Arrays.toString(result));
	}

	// For all the guaranteed built-in charsets, check that we have the right default replacements.
	public void test_defaultReplacementBytesIso_8859_1() throws Exception {
	assertReplacementBytesForEncoder("ISO-8859-1", new byte[] { (byte) '?' });
	}
	public void test_defaultReplacementBytesUs_Ascii() throws Exception {
	assertReplacementBytesForEncoder("US-ASCII", new byte[] { (byte) '?' });
	}
	public void test_defaultReplacementBytesUtf_16() throws Exception {
	assertReplacementBytesForEncoder("UTF-16", new byte[] { (byte) 0xff, (byte) 0xfd });
	}
	public void test_defaultReplacementBytesUtf_16be() throws Exception {
	assertReplacementBytesForEncoder("UTF-16BE", new byte[] { (byte) 0xff, (byte) 0xfd });
	}
	public void test_defaultReplacementBytesUtf_16le() throws Exception {
	assertReplacementBytesForEncoder("UTF-16LE", new byte[] { (byte) 0xfd, (byte) 0xff });
	}
	public void test_defaultReplacementBytesUtf_8() throws Exception {
	assertReplacementBytesForEncoder("UTF-8", new byte[] { (byte) '?' });
	}

	public void testSurrogatePairAllAtOnce() throws Exception {
	// okay: surrogate pair seen all at once is decoded to U+20b9f.
	Charset cs = Charset.forName("UTF-32BE");
	CharsetEncoder e = cs.newEncoder();
	ByteBuffer bb = ByteBuffer.allocate(128);
	CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842', '\udf9f' }), bb, false);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(4, bb.position());
	assertEquals((byte) 0x00, bb.get(0));
	assertEquals((byte) 0x02, bb.get(1));
	assertEquals((byte) 0x0b, bb.get(2));
	assertEquals((byte) 0x9f, bb.get(3));
	}

	public void testMalformedSurrogatePair() throws Exception {
	// malformed: low surrogate first is detected as an error.
	Charset cs = Charset.forName("UTF-32BE");
	CharsetEncoder e = cs.newEncoder();
	ByteBuffer bb = ByteBuffer.allocate(128);
	CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
	assertTrue(cr.toString(), cr.isMalformed());
	assertEquals(1, cr.length());
	}

	public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE_RI() throws Exception {
	testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.IGNORE);
	}

	public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT_RI() throws Exception {
	testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPORT);
	}

	public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE_RI() throws Exception {
	testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction.REPLACE);
	}

	private void testCharsetEncoderSurrogatesBrokenByDesign_RI(CodingErrorAction cea) throws Exception {
	// stupid: on the RI, writing the two halves of the surrogate pair in separate writes
	// is an error because the CharsetEncoder doesn't remember it's half-way through a
	// surrogate pair across the two calls!

	// IGNORE just ignores both characters, REPORT complains that the second is
	// invalid (because it doesn't remember seeing the first), and REPLACE inserts a
	// replacement character U+fffd when it sees the second character (because it too
	// doesn't remember seeing the first).

	Charset cs = Charset.forName("UTF-32BE");
	CharsetEncoder e = cs.newEncoder();
	e.onMalformedInput(cea);
	e.onUnmappableCharacter(cea);
	ByteBuffer bb = ByteBuffer.allocate(128);
	CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(0, bb.position());
	cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
	if (cea == CodingErrorAction.REPORT) {
	assertTrue(cr.toString(), cr.isMalformed());
	assertEquals(1, cr.length());
	return;
	}
	assertEquals(CoderResult.UNDERFLOW, cr);
	int expectedPosition = 0;
	if (cea == CodingErrorAction.REPLACE) {
	expectedPosition = 4;
	assertEquals(expectedPosition, bb.position());
	System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position())));
	assertEquals((byte) 0x00, bb.get(0));
	assertEquals((byte) 0x00, bb.get(1));
	assertEquals((byte) 0xff, bb.get(2));
	assertEquals((byte) 0xfd, bb.get(3));
	}
	assertEquals(expectedPosition, bb.position());
	cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(expectedPosition, bb.position());
	cr = e.flush(bb);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(expectedPosition, bb.position());
	}

	public void testCharsetEncoderSurrogatesBrokenByDesign_IGNORE() throws Exception {
	testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.IGNORE);
	}

	public void testCharsetEncoderSurrogatesBrokenByDesign_REPORT() throws Exception {
	testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPORT);
	}

	public void testCharsetEncoderSurrogatesBrokenByDesign_REPLACE() throws Exception {
	testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction.REPLACE);
	}

	private void testCharsetEncoderSurrogatesBrokenByDesign(CodingErrorAction cea) throws Exception {
	// Writing the two halves of the surrogate pair in separate writes works just fine.
	// This is true of Android and ICU, but not of the RI.
	Charset cs = Charset.forName("UTF-32BE");
	CharsetEncoder e = cs.newEncoder();
	e.onMalformedInput(cea);
	e.onUnmappableCharacter(cea);
	ByteBuffer bb = ByteBuffer.allocate(128);
	CoderResult cr = e.encode(CharBuffer.wrap(new char[] { '\ud842' }), bb, false);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(0, bb.position());
	cr = e.encode(CharBuffer.wrap(new char[] { '\udf9f' }), bb, false);
	assertEquals(CoderResult.UNDERFLOW, cr);
	int expectedPosition = 4;
	assertEquals(expectedPosition, bb.position());
	System.err.println(Arrays.toString(Arrays.copyOfRange(bb.array(), 0, bb.position())));
	assertEquals((byte) 0x00, bb.get(0));
	assertEquals((byte) 0x02, bb.get(1));
	assertEquals((byte) 0x0b, bb.get(2));
	assertEquals((byte) 0x9f, bb.get(3));
	cr = e.encode(CharBuffer.wrap(new char[] { }), bb, true);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(expectedPosition, bb.position());
	cr = e.flush(bb);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(expectedPosition, bb.position());
	}

	public void testFlushWithoutEndOfInput() throws Exception {
	Charset cs = Charset.forName("UTF-32BE");
	CharsetEncoder e = cs.newEncoder();
	ByteBuffer bb = ByteBuffer.allocate(128);
	CoderResult cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, false);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(4, bb.position());
	try {
	cr = e.flush(bb);
	} catch (IllegalStateException expected) {
	// you must call encode with endOfInput true before you can flush.
	}

	// We had a bug where we wouldn't reset inEnd before calling encode in implFlush.
	// That would result in flush outputting garbage.
	cr = e.encode(CharBuffer.wrap(new char[] { 'x' }), bb, true);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(8, bb.position());
	cr = e.flush(bb);
	assertEquals(CoderResult.UNDERFLOW, cr);
	assertEquals(8, bb.position());
	}
	}