| /**************************************************************************** |
| ** |
| ** Copyright (C) 2015 The Qt Company Ltd. |
| ** Copyright (C) 2013 Intel Corporation |
| ** Contact: http://www.qt.io/licensing/ |
| ** |
| ** This file is part of the QtCore module of the Qt Toolkit. |
| ** |
| ** $QT_BEGIN_LICENSE:LGPL21$ |
| ** Commercial License Usage |
| ** Licensees holding valid commercial Qt licenses may use this file in |
| ** accordance with the commercial license agreement provided with the |
| ** Software or, alternatively, in accordance with the terms contained in |
| ** a written agreement between you and The Qt Company. For licensing terms |
| ** and conditions see http://www.qt.io/terms-conditions. For further |
| ** information use the contact form at http://www.qt.io/contact-us. |
| ** |
| ** GNU Lesser General Public License Usage |
| ** Alternatively, this file may be used under the terms of the GNU Lesser |
| ** General Public License version 2.1 or version 3 as published by the Free |
| ** Software Foundation and appearing in the file LICENSE.LGPLv21 and |
| ** LICENSE.LGPLv3 included in the packaging of this file. Please review the |
| ** following information to ensure the GNU Lesser General Public License |
| ** requirements will be met: https://www.gnu.org/licenses/lgpl.html and |
| ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
| ** |
| ** As a special exception, The Qt Company gives you certain additional |
| ** rights. These rights are described in The Qt Company LGPL Exception |
| ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
| ** |
| ** $QT_END_LICENSE$ |
| ** |
| ****************************************************************************/ |
| |
| #ifndef QUTFCODEC_P_H |
| #define QUTFCODEC_P_H |
| |
| // |
| // W A R N I N G |
| // ------------- |
| // |
| // This file is not part of the Qt API. It exists purely as an |
| // implementation detail. This header file may change from version to |
| // version without notice, or even be removed. |
| // |
| // We mean it. |
| // |
| |
| #include "QtCore/qtextcodec.h" |
| #include "private/qtextcodec_p.h" |
| |
| QT_BEGIN_NAMESPACE |
| |
| struct QUtf8BaseTraits |
| { |
| static const bool isTrusted = false; |
| static const bool allowNonCharacters = true; |
| static const bool skipAsciiHandling = false; |
| static const int Error = -1; |
| static const int EndOfString = -2; |
| |
| static bool isValidCharacter(uint u) |
| { return int(u) >= 0; } |
| |
| static void appendByte(uchar *&ptr, uchar b) |
| { *ptr++ = b; } |
| |
| static uchar peekByte(const uchar *ptr, int n = 0) |
| { return ptr[n]; } |
| |
| static qptrdiff availableBytes(const uchar *ptr, const uchar *end) |
| { return end - ptr; } |
| |
| static void advanceByte(const uchar *&ptr, int n = 1) |
| { ptr += n; } |
| |
| static void appendUtf16(ushort *&ptr, ushort uc) |
| { *ptr++ = uc; } |
| |
| static void appendUcs4(ushort *&ptr, uint uc) |
| { |
| appendUtf16(ptr, QChar::highSurrogate(uc)); |
| appendUtf16(ptr, QChar::lowSurrogate(uc)); |
| } |
| |
| static ushort peekUtf16(const ushort *ptr, int n = 0) |
| { return ptr[n]; } |
| |
| static qptrdiff availableUtf16(const ushort *ptr, const ushort *end) |
| { return end - ptr; } |
| |
| static void advanceUtf16(const ushort *&ptr, int n = 1) |
| { ptr += n; } |
| |
| // it's possible to output to UCS-4 too |
| static void appendUtf16(uint *&ptr, ushort uc) |
| { *ptr++ = uc; } |
| |
| static void appendUcs4(uint *&ptr, uint uc) |
| { *ptr++ = uc; } |
| }; |
| |
| struct QUtf8BaseTraitsNoAscii : public QUtf8BaseTraits |
| { |
| static const bool skipAsciiHandling = true; |
| }; |
| |
| namespace QUtf8Functions |
| { |
| /// returns 0 on success; errors can only happen if \a u is a surrogate: |
| /// Error if \a u is a low surrogate; |
| /// if \a u is a high surrogate, Error if the next isn't a low one, |
| /// EndOfString if we run into the end of the string. |
| template <typename Traits, typename OutputPtr, typename InputPtr> inline |
| int toUtf8(ushort u, OutputPtr &dst, InputPtr &src, InputPtr end) |
| { |
| if (!Traits::skipAsciiHandling && u < 0x80) { |
| // U+0000 to U+007F (US-ASCII) - one byte |
| Traits::appendByte(dst, uchar(u)); |
| return 0; |
| } else if (u < 0x0800) { |
| // U+0080 to U+07FF - two bytes |
| // first of two bytes |
| Traits::appendByte(dst, 0xc0 | uchar(u >> 6)); |
| } else { |
| if (!QChar::isSurrogate(u)) { |
| // U+0800 to U+FFFF (except U+D800-U+DFFF) - three bytes |
| if (!Traits::allowNonCharacters && QChar::isNonCharacter(u)) |
| return Traits::Error; |
| |
| // first of three bytes |
| Traits::appendByte(dst, 0xe0 | uchar(u >> 12)); |
| } else { |
| // U+10000 to U+10FFFF - four bytes |
| // need to get one extra codepoint |
| if (Traits::availableUtf16(src, end) == 0) |
| return Traits::EndOfString; |
| |
| ushort low = Traits::peekUtf16(src); |
| if (!QChar::isHighSurrogate(u)) |
| return Traits::Error; |
| if (!QChar::isLowSurrogate(low)) |
| return Traits::Error; |
| |
| Traits::advanceUtf16(src); |
| uint ucs4 = QChar::surrogateToUcs4(u, low); |
| |
| if (!Traits::allowNonCharacters && QChar::isNonCharacter(ucs4)) |
| return Traits::Error; |
| |
| // first byte |
| Traits::appendByte(dst, 0xf0 | (uchar(ucs4 >> 18) & 0xf)); |
| |
| // second of four bytes |
| Traits::appendByte(dst, 0x80 | (uchar(ucs4 >> 12) & 0x3f)); |
| |
| // for the rest of the bytes |
| u = ushort(ucs4); |
| } |
| |
| // second to last byte |
| Traits::appendByte(dst, 0x80 | (uchar(u >> 6) & 0x3f)); |
| } |
| |
| // last byte |
| Traits::appendByte(dst, 0x80 | (u & 0x3f)); |
| return 0; |
| } |
| |
| inline bool isContinuationByte(uchar b) |
| { |
| return (b & 0xc0) == 0x80; |
| } |
| |
| /// returns the number of characters consumed (including \a b) in case of success; |
| /// returns negative in case of error: Traits::Error or Traits::EndOfString |
| template <typename Traits, typename OutputPtr, typename InputPtr> inline |
| int fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end) |
| { |
| int charsNeeded; |
| uint min_uc; |
| uint uc; |
| |
| if (!Traits::skipAsciiHandling && b < 0x80) { |
| // US-ASCII |
| Traits::appendUtf16(dst, b); |
| return 1; |
| } |
| |
| if (!Traits::isTrusted && Q_UNLIKELY(b <= 0xC1)) { |
| // an UTF-8 first character must be at least 0xC0 |
| // however, all 0xC0 and 0xC1 first bytes can only produce overlong sequences |
| return Traits::Error; |
| } else if (b < 0xe0) { |
| charsNeeded = 2; |
| min_uc = 0x80; |
| uc = b & 0x1f; |
| } else if (b < 0xf0) { |
| charsNeeded = 3; |
| min_uc = 0x800; |
| uc = b & 0x0f; |
| } else if (b < 0xf5) { |
| charsNeeded = 4; |
| min_uc = 0x10000; |
| uc = b & 0x07; |
| } else { |
| // the last Unicode character is U+10FFFF |
| // it's encoded in UTF-8 as "\xF4\x8F\xBF\xBF" |
| // therefore, a byte higher than 0xF4 is not the UTF-8 first byte |
| return Traits::Error; |
| } |
| |
| int bytesAvailable = Traits::availableBytes(src, end); |
| if (Q_UNLIKELY(bytesAvailable < charsNeeded - 1)) { |
| // it's possible that we have an error instead of just unfinished bytes |
| if (bytesAvailable > 0 && !isContinuationByte(Traits::peekByte(src, 0))) |
| return Traits::Error; |
| if (bytesAvailable > 1 && !isContinuationByte(Traits::peekByte(src, 1))) |
| return Traits::Error; |
| if (bytesAvailable > 2 && !isContinuationByte(Traits::peekByte(src, 2))) |
| return Traits::Error; |
| return Traits::EndOfString; |
| } |
| |
| // first continuation character |
| b = Traits::peekByte(src, 0); |
| if (!isContinuationByte(b)) |
| return Traits::Error; |
| uc <<= 6; |
| uc |= b & 0x3f; |
| |
| if (charsNeeded > 2) { |
| // second continuation character |
| b = Traits::peekByte(src, 1); |
| if (!isContinuationByte(b)) |
| return Traits::Error; |
| uc <<= 6; |
| uc |= b & 0x3f; |
| |
| if (charsNeeded > 3) { |
| // third continuation character |
| b = Traits::peekByte(src, 2); |
| if (!isContinuationByte(b)) |
| return Traits::Error; |
| uc <<= 6; |
| uc |= b & 0x3f; |
| } |
| } |
| |
| // we've decoded something; safety-check it |
| if (!Traits::isTrusted) { |
| if (uc < min_uc) |
| return Traits::Error; |
| if (QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) |
| return Traits::Error; |
| if (!Traits::allowNonCharacters && QChar::isNonCharacter(uc)) |
| return Traits::Error; |
| } |
| |
| // write the UTF-16 sequence |
| if (!QChar::requiresSurrogates(uc)) { |
| // UTF-8 decoded and no surrogates are required |
| // detach if necessary |
| Traits::appendUtf16(dst, ushort(uc)); |
| } else { |
| // UTF-8 decoded to something that requires a surrogate pair |
| Traits::appendUcs4(dst, uc); |
| } |
| |
| Traits::advanceByte(src, charsNeeded - 1); |
| return charsNeeded; |
| } |
| } |
| |
| enum DataEndianness |
| { |
| DetectEndianness, |
| BigEndianness, |
| LittleEndianness |
| }; |
| |
| struct QUtf8 |
| { |
| static QString convertToUnicode(const char *, int); |
| static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *); |
| static QByteArray convertFromUnicode(const QChar *, int); |
| static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *); |
| }; |
| |
| struct QUtf16 |
| { |
| static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); |
| static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); |
| }; |
| |
| struct QUtf32 |
| { |
| static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); |
| static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); |
| }; |
| |
| #ifndef QT_NO_TEXTCODEC |
| |
| class QUtf8Codec : public QTextCodec { |
| public: |
| ~QUtf8Codec(); |
| |
| QByteArray name() const Q_DECL_OVERRIDE; |
| int mibEnum() const Q_DECL_OVERRIDE; |
| |
| QString convertToUnicode(const char *, int, ConverterState *) const Q_DECL_OVERRIDE; |
| QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const Q_DECL_OVERRIDE; |
| void convertToUnicode(QString *target, const char *, int, ConverterState *) const; |
| }; |
| |
| class QUtf16Codec : public QTextCodec { |
| protected: |
| public: |
| QUtf16Codec() { e = DetectEndianness; } |
| ~QUtf16Codec(); |
| |
| QByteArray name() const Q_DECL_OVERRIDE; |
| QList<QByteArray> aliases() const Q_DECL_OVERRIDE; |
| int mibEnum() const Q_DECL_OVERRIDE; |
| |
| QString convertToUnicode(const char *, int, ConverterState *) const Q_DECL_OVERRIDE; |
| QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const Q_DECL_OVERRIDE; |
| |
| protected: |
| DataEndianness e; |
| }; |
| |
| class QUtf16BECodec : public QUtf16Codec { |
| public: |
| QUtf16BECodec() : QUtf16Codec() { e = BigEndianness; } |
| QByteArray name() const Q_DECL_OVERRIDE; |
| QList<QByteArray> aliases() const Q_DECL_OVERRIDE; |
| int mibEnum() const Q_DECL_OVERRIDE; |
| }; |
| |
| class QUtf16LECodec : public QUtf16Codec { |
| public: |
| QUtf16LECodec() : QUtf16Codec() { e = LittleEndianness; } |
| QByteArray name() const Q_DECL_OVERRIDE; |
| QList<QByteArray> aliases() const Q_DECL_OVERRIDE; |
| int mibEnum() const Q_DECL_OVERRIDE; |
| }; |
| |
| class QUtf32Codec : public QTextCodec { |
| public: |
| QUtf32Codec() { e = DetectEndianness; } |
| ~QUtf32Codec(); |
| |
| QByteArray name() const Q_DECL_OVERRIDE; |
| QList<QByteArray> aliases() const Q_DECL_OVERRIDE; |
| int mibEnum() const Q_DECL_OVERRIDE; |
| |
| QString convertToUnicode(const char *, int, ConverterState *) const Q_DECL_OVERRIDE; |
| QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const Q_DECL_OVERRIDE; |
| |
| protected: |
| DataEndianness e; |
| }; |
| |
| class QUtf32BECodec : public QUtf32Codec { |
| public: |
| QUtf32BECodec() : QUtf32Codec() { e = BigEndianness; } |
| QByteArray name() const Q_DECL_OVERRIDE; |
| QList<QByteArray> aliases() const Q_DECL_OVERRIDE; |
| int mibEnum() const Q_DECL_OVERRIDE; |
| }; |
| |
| class QUtf32LECodec : public QUtf32Codec { |
| public: |
| QUtf32LECodec() : QUtf32Codec() { e = LittleEndianness; } |
| QByteArray name() const Q_DECL_OVERRIDE; |
| QList<QByteArray> aliases() const Q_DECL_OVERRIDE; |
| int mibEnum() const Q_DECL_OVERRIDE; |
| }; |
| |
| |
| #endif // QT_NO_TEXTCODEC |
| |
| QT_END_NAMESPACE |
| |
| #endif // QUTFCODEC_P_H |