| /* GENERATED SOURCE. DO NOT MODIFY. */ |
| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /** |
| ******************************************************************************* |
| * Copyright (C) 2005-2016, International Business Machines Corporation and * |
| * others. All Rights Reserved. * |
| ******************************************************************************* |
| */ |
| package android.icu.text; |
| |
| import java.io.ByteArrayInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.Reader; |
| |
| |
| /** |
| * This class represents a charset that has been identified by a CharsetDetector |
| * as a possible encoding for a set of input data. From an instance of this |
| * class, you can ask for a confidence level in the charset identification, |
| * or for Java Reader or String to access the original byte data in Unicode form. |
| * <p> |
| * Instances of this class are created only by CharsetDetectors. |
| * <p> |
| * Note: this class has a natural ordering that is inconsistent with equals. |
| * The natural ordering is based on the match confidence value. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public class CharsetMatch implements Comparable<CharsetMatch> { |
| |
| |
| /** |
| * Create a java.io.Reader for reading the Unicode character data corresponding |
| * to the original byte data supplied to the Charset detect operation. |
| * <p> |
| * CAUTION: if the source of the byte data was an InputStream, a Reader |
| * can be created for only one matching char set using this method. If more |
| * than one charset needs to be tried, the caller will need to reset |
| * the InputStream and create InputStreamReaders itself, based on the charset name. |
| * |
| * @return the Reader for the Unicode character data. |
| */ |
| public Reader getReader() { |
| InputStream inputStream = fInputStream; |
| |
| if (inputStream == null) { |
| inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength); |
| } |
| |
| try { |
| inputStream.reset(); |
| return new InputStreamReader(inputStream, getName()); |
| } catch (IOException e) { |
| return null; |
| } |
| } |
| |
| /** |
| * Create a Java String from Unicode character data corresponding |
| * to the original byte data supplied to the Charset detect operation. |
| * |
| * @return a String created from the converted input data. |
| */ |
| public String getString() throws java.io.IOException { |
| return getString(-1); |
| |
| } |
| |
| /** |
| * Create a Java String from Unicode character data corresponding |
| * to the original byte data supplied to the Charset detect operation. |
| * The length of the returned string is limited to the specified size; |
| * the string will be trunctated to this length if necessary. A limit value of |
| * zero or less is ignored, and treated as no limit. |
| * |
| * @param maxLength The maximum length of the String to be created when the |
| * source of the data is an input stream, or -1 for |
| * unlimited length. |
| * @return a String created from the converted input data. |
| */ |
| public String getString(int maxLength) throws java.io.IOException { |
| String result = null; |
| if (fInputStream != null) { |
| StringBuilder sb = new StringBuilder(); |
| char[] buffer = new char[1024]; |
| Reader reader = getReader(); |
| int max = maxLength < 0? Integer.MAX_VALUE : maxLength; |
| int bytesRead = 0; |
| |
| while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) >= 0) { |
| sb.append(buffer, 0, bytesRead); |
| max -= bytesRead; |
| } |
| |
| reader.close(); |
| |
| return sb.toString(); |
| } else { |
| String name = getName(); |
| /* |
| * getName() may return a name with a suffix 'rtl' or 'ltr'. This cannot |
| * be used to open a charset (e.g. IBM424_rtl). The ending '_rtl' or 'ltr' |
| * should be stripped off before creating the string. |
| */ |
| int startSuffix = name.indexOf("_rtl") < 0 ? name.indexOf("_ltr") : name.indexOf("_rtl"); |
| if (startSuffix > 0) { |
| name = name.substring(0, startSuffix); |
| } |
| result = new String(fRawInput, name); |
| } |
| return result; |
| |
| } |
| |
| /** |
| * Get an indication of the confidence in the charset detected. |
| * Confidence values range from 0-100, with larger numbers indicating |
| * a better match of the input data to the characteristics of the |
| * charset. |
| * |
| * @return the confidence in the charset match |
| */ |
| public int getConfidence() { |
| return fConfidence; |
| } |
| |
| /** |
| * Get the name of the detected charset. |
| * The name will be one that can be used with other APIs on the |
| * platform that accept charset names. It is the "Canonical name" |
| * as defined by the class java.nio.charset.Charset; for |
| * charsets that are registered with the IANA charset registry, |
| * this is the MIME-preferred registerd name. |
| * |
| * @see java.nio.charset.Charset |
| * @see java.io.InputStreamReader |
| * |
| * @return The name of the charset. |
| */ |
| public String getName() { |
| return fCharsetName; |
| } |
| |
| /** |
| * Get the ISO code for the language of the detected charset. |
| * |
| * @return The ISO code for the language or <code>null</code> if the language cannot be determined. |
| */ |
| public String getLanguage() { |
| return fLang; |
| } |
| |
| /** |
| * Compare to other CharsetMatch objects. |
| * Comparison is based on the match confidence value, which |
| * allows CharsetDetector.detectAll() to order its results. |
| * |
| * @param other the CharsetMatch object to compare against. |
| * @return a negative integer, zero, or a positive integer as the |
| * confidence level of this CharsetMatch |
| * is less than, equal to, or greater than that of |
| * the argument. |
| * @throws ClassCastException if the argument is not a CharsetMatch. |
| */ |
| @Override |
| public int compareTo (CharsetMatch other) { |
| int compareResult = 0; |
| if (this.fConfidence > other.fConfidence) { |
| compareResult = 1; |
| } else if (this.fConfidence < other.fConfidence) { |
| compareResult = -1; |
| } |
| return compareResult; |
| } |
| |
| /* |
| * Constructor. Implementation internal |
| */ |
| CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf) { |
| fConfidence = conf; |
| |
| // The references to the original application input data must be copied out |
| // of the charset recognizer to here, in case the application resets the |
| // recognizer before using this CharsetMatch. |
| if (det.fInputStream == null) { |
| // We only want the existing input byte data if it came straight from the user, |
| // not if is just the head of a stream. |
| fRawInput = det.fRawInput; |
| fRawLength = det.fRawLength; |
| } |
| fInputStream = det.fInputStream; |
| fCharsetName = rec.getName(); |
| fLang = rec.getLanguage(); |
| } |
| |
| /* |
| * Constructor. Implementation internal |
| */ |
| CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang) { |
| fConfidence = conf; |
| |
| // The references to the original application input data must be copied out |
| // of the charset recognizer to here, in case the application resets the |
| // recognizer before using this CharsetMatch. |
| if (det.fInputStream == null) { |
| // We only want the existing input byte data if it came straight from the user, |
| // not if is just the head of a stream. |
| fRawInput = det.fRawInput; |
| fRawLength = det.fRawLength; |
| } |
| fInputStream = det.fInputStream; |
| fCharsetName = csName; |
| fLang = lang; |
| } |
| |
| |
| // |
| // Private Data |
| // |
| private int fConfidence; |
| private byte[] fRawInput = null; // Original, untouched input bytes. |
| // If user gave us a byte array, this is it. |
| private int fRawLength; // Length of data in fRawInput array. |
| |
| private InputStream fInputStream = null; // User's input stream, or null if the user |
| // gave us a byte array. |
| |
| private String fCharsetName; // The name of the charset this CharsetMatch |
| // represents. Filled in by the recognizer. |
| private String fLang; // The language, if one was determined by |
| // the recognizer during the detect operation. |
| } |