| // |
| // ======================================================================== |
| // Copyright (c) 1995-2014 Mort Bay Consulting Pty. Ltd. |
| // ------------------------------------------------------------------------ |
| // All rights reserved. This program and the accompanying materials |
| // are made available under the terms of the Eclipse Public License v1.0 |
| // and Apache License v2.0 which accompanies this distribution. |
| // |
| // The Eclipse Public License is available at |
| // http://www.eclipse.org/legal/epl-v10.html |
| // |
| // The Apache License v2.0 is available at |
| // http://www.opensource.org/licenses/apache2.0.php |
| // |
| // You may elect to redistribute this code under either of these licenses. |
| // ======================================================================== |
| // |
| |
| package org.eclipse.jetty.util; |
| |
| import java.io.IOException; |
| |
| import org.eclipse.jetty.util.log.Log; |
| import org.eclipse.jetty.util.log.Logger; |
| |
| /* ------------------------------------------------------------ */ |
| /** |
| * Utf8 Appendable abstract base class |
| * |
| * This abstract class wraps a standard {@link java.lang.Appendable} and provides methods to append UTF-8 encoded bytes, that are converted into characters. |
| * |
| * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before state a character is appended to the string buffer. |
| * |
| * The UTF-8 decoding is done by this class and no additional buffers or Readers are used. The UTF-8 code was inspired by |
| * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
| * |
| * License information for Bjoern Hoehrmann's code: |
| * |
| * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> |
| * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |
| * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
| * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| **/ |
| public abstract class Utf8Appendable |
| { |
| protected static final Logger LOG = Log.getLogger(Utf8Appendable.class); |
| public static final char REPLACEMENT = '\ufffd'; |
| private static final int UTF8_ACCEPT = 0; |
| private static final int UTF8_REJECT = 12; |
| |
| protected final Appendable _appendable; |
| protected int _state = UTF8_ACCEPT; |
| |
| private static final byte[] BYTE_TABLE = |
| { |
| // The first part of the table maps bytes to character classes that |
| // to reduce the size of the transition table and create bitmasks. |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
| 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, |
| 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, |
| 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
| 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8 |
| }; |
| |
| private static final byte[] TRANS_TABLE = |
| { |
| // The second part is a transition table that maps a combination |
| // of a state of the automaton and a character class to a state. |
| 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, |
| 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, |
| 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, |
| 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, |
| 12,36,12,12,12,12,12,12,12,12,12,12 |
| }; |
| |
| private int _codep; |
| |
| public Utf8Appendable(Appendable appendable) |
| { |
| _appendable = appendable; |
| } |
| |
| public abstract int length(); |
| |
| protected void reset() |
| { |
| _state = UTF8_ACCEPT; |
| } |
| |
| public void append(byte b) |
| { |
| try |
| { |
| appendByte(b); |
| } |
| catch (IOException e) |
| { |
| throw new RuntimeException(e); |
| } |
| } |
| |
| public void append(byte[] b, int offset, int length) |
| { |
| try |
| { |
| int end = offset + length; |
| for (int i = offset; i < end; i++) |
| appendByte(b[i]); |
| } |
| catch (IOException e) |
| { |
| throw new RuntimeException(e); |
| } |
| } |
| |
| public boolean append(byte[] b, int offset, int length, int maxChars) |
| { |
| try |
| { |
| int end = offset + length; |
| for (int i = offset; i < end; i++) |
| { |
| if (length() > maxChars) |
| return false; |
| appendByte(b[i]); |
| } |
| return true; |
| } |
| catch (IOException e) |
| { |
| throw new RuntimeException(e); |
| } |
| } |
| |
| protected void appendByte(byte b) throws IOException |
| { |
| |
| if (b > 0 && _state == UTF8_ACCEPT) |
| { |
| _appendable.append((char)(b & 0xFF)); |
| } |
| else |
| { |
| int i = b & 0xFF; |
| int type = BYTE_TABLE[i]; |
| _codep = _state == UTF8_ACCEPT ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6); |
| int next = TRANS_TABLE[_state + type]; |
| |
| switch(next) |
| { |
| case UTF8_ACCEPT: |
| _state=next; |
| if (_codep < Character.MIN_HIGH_SURROGATE) |
| { |
| _appendable.append((char)_codep); |
| } |
| else |
| { |
| for (char c : Character.toChars(_codep)) |
| _appendable.append(c); |
| } |
| break; |
| |
| case UTF8_REJECT: |
| String reason = "byte "+TypeUtil.toHexString(b)+" in state "+(_state/12); |
| _codep=0; |
| _state = UTF8_ACCEPT; |
| _appendable.append(REPLACEMENT); |
| throw new NotUtf8Exception(reason); |
| |
| default: |
| _state=next; |
| |
| } |
| } |
| } |
| |
| public boolean isUtf8SequenceComplete() |
| { |
| return _state == UTF8_ACCEPT; |
| } |
| |
| public static class NotUtf8Exception extends IllegalArgumentException |
| { |
| public NotUtf8Exception(String reason) |
| { |
| super("Not valid UTF8! "+reason); |
| } |
| } |
| |
| protected void checkState() |
| { |
| if (!isUtf8SequenceComplete()) |
| { |
| _codep=0; |
| _state = UTF8_ACCEPT; |
| try |
| { |
| _appendable.append(REPLACEMENT); |
| } |
| catch(IOException e) |
| { |
| throw new RuntimeException(e); |
| } |
| throw new NotUtf8Exception("incomplete UTF8 sequence"); |
| } |
| } |
| |
| public String toReplacedString() |
| { |
| if (!isUtf8SequenceComplete()) |
| { |
| _codep=0; |
| _state = UTF8_ACCEPT; |
| try |
| { |
| _appendable.append(REPLACEMENT); |
| } |
| catch(IOException e) |
| { |
| throw new RuntimeException(e); |
| } |
| Throwable th= new NotUtf8Exception("incomplete UTF8 sequence"); |
| LOG.warn(th.toString()); |
| LOG.debug(th); |
| } |
| return _appendable.toString(); |
| } |
| } |