| /* GENERATED SOURCE. DO NOT MODIFY. */ |
| // © 2021 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| |
| // emojiprops.h |
| // created: 2021sep06 Markus W. Scherer |
| |
| package android.icu.impl; |
| |
| import java.io.IOException; |
| import java.nio.ByteBuffer; |
| |
| import android.icu.lang.UProperty; |
| import android.icu.text.UnicodeSet; |
| import android.icu.util.BytesTrie; |
| import android.icu.util.CharsTrie; |
| import android.icu.util.CodePointMap; |
| import android.icu.util.CodePointTrie; |
| import android.icu.util.ICUUncheckedIOException; |
| |
| /** |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public final class EmojiProps { |
| private static final class IsAcceptable implements ICUBinary.Authenticate { |
| @Override |
| public boolean isDataVersionAcceptable(byte version[]) { |
| return version[0] == 1; |
| } |
| } |
| private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); |
| private static final int DATA_FORMAT = 0x456d6f6a; // "Emoj" |
| |
| // Byte offsets from the start of the data, after the generic header, |
| // in ascending order. |
| // UCPTrie=CodePointTrie, follows the indexes |
| private static final int IX_CPTRIE_OFFSET = 0; |
| |
| // UCharsTrie=CharsTrie |
| private static final int IX_BASIC_EMOJI_TRIE_OFFSET = 4; |
| //ivate static final int IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET = 5; |
| //ivate static final int IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET = 6; |
| //ivate static final int IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET = 7; |
| //ivate static final int IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET = 8; |
| private static final int IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET = 9; |
| |
| // Properties in the code point trie. |
| // https://www.unicode.org/reports/tr51/#Emoji_Properties |
| private static final int BIT_EMOJI = 0; |
| private static final int BIT_EMOJI_PRESENTATION = 1; |
| private static final int BIT_EMOJI_MODIFIER = 2; |
| private static final int BIT_EMOJI_MODIFIER_BASE = 3; |
| private static final int BIT_EMOJI_COMPONENT = 4; |
| private static final int BIT_EXTENDED_PICTOGRAPHIC = 5; |
| // https://www.unicode.org/reports/tr51/#Emoji_Sets |
| private static final int BIT_BASIC_EMOJI = 6; |
| |
| public static final EmojiProps INSTANCE = new EmojiProps(); |
| |
| private CodePointTrie.Fast8 cpTrie = null; |
| private String stringTries[] = new String[6]; |
| |
| /** Input i: One of the IX_..._TRIE_OFFSET indexes into the data file indexes[] array. */ |
| private static int getStringTrieIndex(int i) { |
| return i - IX_BASIC_EMOJI_TRIE_OFFSET; |
| } |
| |
| private EmojiProps() { |
| ByteBuffer bytes = ICUBinary.getRequiredData("uemoji.icu"); |
| try { |
| ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE); |
| int startPos = bytes.position(); |
| |
| int cpTrieOffset = bytes.getInt(); // inIndexes[IX_CPTRIE_OFFSET] |
| int indexesLength = cpTrieOffset / 4; |
| if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) { |
| throw new ICUUncheckedIOException( |
| "Emoji properties data: not enough indexes"); |
| } |
| |
| int[] inIndexes = new int[indexesLength]; |
| inIndexes[0] = cpTrieOffset; |
| for (int i = 1; i < indexesLength; ++i) { |
| inIndexes[i] = bytes.getInt(); |
| } |
| |
| int i = IX_CPTRIE_OFFSET; |
| int offset = inIndexes[i++]; |
| int nextOffset = inIndexes[i]; |
| cpTrie = CodePointTrie.Fast8.fromBinary(bytes); |
| int pos = bytes.position() - startPos; |
| assert nextOffset >= pos; |
| ICUBinary.skipBytes(bytes, nextOffset - pos); // skip padding after trie bytes |
| |
| offset = nextOffset; |
| nextOffset = inIndexes[IX_BASIC_EMOJI_TRIE_OFFSET]; |
| ICUBinary.skipBytes(bytes, nextOffset - offset); // skip unknown bytes |
| |
| for (i = IX_BASIC_EMOJI_TRIE_OFFSET; i <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET; ++i) { |
| offset = inIndexes[i]; |
| nextOffset = inIndexes[i + 1]; |
| // Set/leave null if there is no CharsTrie. |
| if (nextOffset > offset) { |
| stringTries[getStringTrieIndex(i)] = |
| ICUBinary.getString(bytes, (nextOffset - offset) / 2, 0); |
| } |
| } |
| } catch(IOException e) { |
| throw new ICUUncheckedIOException(e); |
| } |
| } |
| |
| public UnicodeSet addPropertyStarts(UnicodeSet set) { |
| // Add the start code point of each same-value range of the trie. |
| CodePointMap.Range range = new CodePointMap.Range(); |
| int start = 0; |
| while (cpTrie.getRange(start, null, range)) { |
| set.add(start); |
| start = range.getEnd() + 1; |
| } |
| return set; |
| } |
| |
| // Note: REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere. |
| private static final byte[] bitFlags = { |
| BIT_EMOJI, // UCHAR_EMOJI=57 |
| BIT_EMOJI_PRESENTATION, // UCHAR_EMOJI_PRESENTATION=58 |
| BIT_EMOJI_MODIFIER, // UCHAR_EMOJI_MODIFIER=59 |
| BIT_EMOJI_MODIFIER_BASE, // UCHAR_EMOJI_MODIFIER_BASE=60 |
| BIT_EMOJI_COMPONENT, // UCHAR_EMOJI_COMPONENT=61 |
| -1, // UCHAR_REGIONAL_INDICATOR=62 |
| -1, // UCHAR_PREPENDED_CONCATENATION_MARK=63 |
| BIT_EXTENDED_PICTOGRAPHIC, // UCHAR_EXTENDED_PICTOGRAPHIC=64 |
| BIT_BASIC_EMOJI, // UCHAR_BASIC_EMOJI=65 |
| -1, // UCHAR_EMOJI_KEYCAP_SEQUENCE=66 |
| -1, // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67 |
| -1, // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68 |
| -1, // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69 |
| -1, // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70 |
| BIT_BASIC_EMOJI, // UCHAR_RGI_EMOJI=71 |
| }; |
| |
| public boolean hasBinaryProperty(int c, int which) { |
| if (which < UProperty.EMOJI || UProperty.RGI_EMOJI < which) { |
| return false; |
| } |
| int bit = bitFlags[which - UProperty.EMOJI]; |
| if (bit < 0) { |
| return false; // not a property that we support in this function |
| } |
| int bits = cpTrie.get(c); |
| return ((bits >> bit) & 1) != 0; |
| } |
| |
| public boolean hasBinaryProperty(CharSequence s, int which) { |
| int length = s.length(); |
| if (length == 0) { return false; } // empty string |
| // The caller should have delegated single code points to hasBinaryProperty(c, which). |
| if (which < UProperty.BASIC_EMOJI || UProperty.RGI_EMOJI < which) { |
| return false; |
| } |
| int firstProp = which, lastProp = which; |
| if (which == UProperty.RGI_EMOJI) { |
| // RGI_Emoji is the union of the other emoji properties of strings. |
| firstProp = UProperty.BASIC_EMOJI; |
| lastProp = UProperty.RGI_EMOJI_ZWJ_SEQUENCE; |
| } |
| for (int prop = firstProp; prop <= lastProp; ++prop) { |
| String trieUChars = stringTries[prop - UProperty.BASIC_EMOJI]; |
| if (trieUChars != null) { |
| CharsTrie trie = new CharsTrie(trieUChars, 0); |
| BytesTrie.Result result = trie.next(s, 0, length); |
| if (result.hasValue()) { |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| public void addStrings(int which, UnicodeSet set) { |
| if (which < UProperty.BASIC_EMOJI || UProperty.RGI_EMOJI < which) { |
| return; |
| } |
| int firstProp = which, lastProp = which; |
| if (which == UProperty.RGI_EMOJI) { |
| // RGI_Emoji is the union of the other emoji properties of strings. |
| firstProp = UProperty.BASIC_EMOJI; |
| lastProp = UProperty.RGI_EMOJI_ZWJ_SEQUENCE; |
| } |
| for (int prop = firstProp; prop <= lastProp; ++prop) { |
| String trieUChars = stringTries[prop - UProperty.BASIC_EMOJI]; |
| if (trieUChars != null) { |
| CharsTrie trie = new CharsTrie(trieUChars, 0); |
| for (CharsTrie.Entry entry : trie) { |
| set.add(entry.chars); |
| } |
| } |
| } |
| } |
| } |