cras/src/server/cras_utf8.c - platform/external/adhd - Git at Google

 /* Copyright 2016 The Chromium OS Authors. All rights reserved.
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include <stdlib.h>
 #include <stdint.h>
 #include <sys/types.h>

 #ifdef CRAS_DBUS
 #include <dbus/dbus.h>
 #endif

 #include "cras_utf8.h"
 #include "cras_util.h"

 static const uint8_t kUTF8ByteOrderMask[3] = { 0xef, 0xbb, 0xbf };

 typedef struct u8range {
 	uint8_t min;
 	uint8_t max;
 } u8range_t;

 static const u8range_t kUTF8TwoByteSeq[] = {
 	{ 0xc2, 0xdf },
 	{ 0x80, 0xbf },
 	{ 0, 0 },
 };

 static const u8range_t kUTF8ByteSeqE0[] = {
 	{ 0xe0, 0xe0 },
 	{ 0xa0, 0xbf },
 	{ 0x80, 0xbf },
 	{ 0, 0 },
 };

 static const u8range_t kUTF8ByteSeqE1EC[] = {
 	{ 0xe1, 0xec },
 	{ 0x80, 0xbf },
 	{ 0x80, 0xbf },
 	{ 0, 0 },
 };

 static const u8range_t kUTF8ByteSeqED[] = {
 	{ 0xed, 0xed },
 	{ 0x80, 0x9f },
 	{ 0x80, 0xbf },
 	{ 0, 0 },
 };

 static const u8range_t kUTF8ByteSeqEEEF[] = {
 	{ 0xee, 0xef },
 	{ 0x80, 0xbf },
 	{ 0x80, 0xbf },
 	{ 0, 0 },
 };

 static const u8range_t kUTF8ByteSeqF0[] = {
 	{ 0xf0, 0xf0 }, { 0x90, 0xbf }, { 0x80, 0xbf },
 	{ 0x80, 0xbf }, { 0, 0 },
 };

 static const u8range_t kUTF8ByteSeqF1F3[] = {
 	{ 0xf1, 0xf3 }, { 0x80, 0xbf }, { 0x80, 0xbf },
 	{ 0x80, 0xbf }, { 0, 0 },
 };

 static const u8range_t kUTF8ByteSeqF4[] = {
 	{ 0xf4, 0xf4 }, { 0x80, 0x8f }, { 0x80, 0xbf },
 	{ 0x80, 0xbf }, { 0, 0 },
 };

 static const u8range_t kUTF8NullRange[] = { { 0, 0 } };

 typedef struct utf8seq {
 	const u8range_t *ranges;
 } utf8seq_t;

 static const utf8seq_t kUTF8Sequences[] = {
 	{ kUTF8TwoByteSeq },  { kUTF8ByteSeqE0 },   { kUTF8ByteSeqE1EC },
 	{ kUTF8ByteSeqED },   { kUTF8ByteSeqEEEF }, { kUTF8ByteSeqF0 },
 	{ kUTF8ByteSeqF1F3 }, { kUTF8ByteSeqF4 },   { kUTF8NullRange }
 };

 int valid_utf8_string(const char *string, size_t *bad_pos)
 {
 	int bom_chars = 0;
 	uint8_t byte;
 	const char *pos = string;
 	int ret = 1;
 	const utf8seq_t *seq = NULL;
 	const u8range_t *range = NULL;

 	if (!pos) {
 		ret = 0;
 		goto error;
 	}

 	while ((byte = (uint8_t) * (pos++))) {
 		if (!range || range->min == 0) {
 			if (byte < 128) {
 				/* Ascii character. */
 				continue;
 			}

 			if (bom_chars < ARRAY_SIZE(kUTF8ByteOrderMask)) {
 				if (byte == kUTF8ByteOrderMask[bom_chars]) {
 					bom_chars++;
 					continue;
 				} else {
 					/* Characters not matching BOM.
 					 * Rewind and assume that there is
 					 * no BOM. */
 					bom_chars =
 						ARRAY_SIZE(kUTF8ByteOrderMask);
 					pos = string;
 					continue;
 				}
 			}

 			/* Find the matching sequence of characters by
 			 * matching the first character in the sequence.
 			 */
 			seq = kUTF8Sequences;
 			while (seq->ranges->min != 0) {
 				if (byte >= seq->ranges->min &&
 				    byte <= seq->ranges->max) {
 					/* Matching sequence. */
 					break;
 				}
 				seq++;
 			}

 			if (seq->ranges->min == 0) {
 				/* Could not find a matching sequence. */
 				ret = 0;
 				goto error;
 			}

 			/* Found the appropriate sequence. */
 			range = seq->ranges + 1;
 			continue;
 		}

 		if (byte >= range->min && byte <= range->max) {
 			range++;
 			continue;
 		}

 		/* This character doesn't belong in UTF8. */
 		ret = 0;
 		goto error;
 	}

 	if (range && range->min != 0) {
 		/* Stopped in the middle of a sequence. */
 		ret = 0;
 	}

 error:
 	if (bad_pos)
 		*bad_pos = pos - string - 1;
 	return ret;
 }

 #ifdef CRAS_DBUS
 /* Use the DBus implementation if available to ensure that the UTF-8
  * sequences match those expected by the DBus implementation. */

 int is_utf8_string(const char *string)
 {
 	return !!dbus_validate_utf8(string, NULL);
 }

 #else

 int is_utf8_string(const char *string)
 {
 	return valid_utf8_string(string, NULL);
 }

 #endif
	/* Copyright 2016 The Chromium OS Authors. All rights reserved.
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include <stdlib.h>
	#include <stdint.h>
	#include <sys/types.h>

	#ifdef CRAS_DBUS
	#include <dbus/dbus.h>
	#endif

	#include "cras_utf8.h"
	#include "cras_util.h"

	static const uint8_t kUTF8ByteOrderMask[3] = { 0xef, 0xbb, 0xbf };

	typedef struct u8range {
	uint8_t min;
	uint8_t max;
	} u8range_t;

	static const u8range_t kUTF8TwoByteSeq[] = {
	{ 0xc2, 0xdf },
	{ 0x80, 0xbf },
	{ 0, 0 },
	};

	static const u8range_t kUTF8ByteSeqE0[] = {
	{ 0xe0, 0xe0 },
	{ 0xa0, 0xbf },
	{ 0x80, 0xbf },
	{ 0, 0 },
	};

	static const u8range_t kUTF8ByteSeqE1EC[] = {
	{ 0xe1, 0xec },
	{ 0x80, 0xbf },
	{ 0x80, 0xbf },
	{ 0, 0 },
	};

	static const u8range_t kUTF8ByteSeqED[] = {
	{ 0xed, 0xed },
	{ 0x80, 0x9f },
	{ 0x80, 0xbf },
	{ 0, 0 },
	};

	static const u8range_t kUTF8ByteSeqEEEF[] = {
	{ 0xee, 0xef },
	{ 0x80, 0xbf },
	{ 0x80, 0xbf },
	{ 0, 0 },
	};

	static const u8range_t kUTF8ByteSeqF0[] = {
	{ 0xf0, 0xf0 }, { 0x90, 0xbf }, { 0x80, 0xbf },
	{ 0x80, 0xbf }, { 0, 0 },
	};

	static const u8range_t kUTF8ByteSeqF1F3[] = {
	{ 0xf1, 0xf3 }, { 0x80, 0xbf }, { 0x80, 0xbf },
	{ 0x80, 0xbf }, { 0, 0 },
	};

	static const u8range_t kUTF8ByteSeqF4[] = {
	{ 0xf4, 0xf4 }, { 0x80, 0x8f }, { 0x80, 0xbf },
	{ 0x80, 0xbf }, { 0, 0 },
	};

	static const u8range_t kUTF8NullRange[] = { { 0, 0 } };

	typedef struct utf8seq {
	const u8range_t *ranges;
	} utf8seq_t;

	static const utf8seq_t kUTF8Sequences[] = {
	{ kUTF8TwoByteSeq }, { kUTF8ByteSeqE0 }, { kUTF8ByteSeqE1EC },
	{ kUTF8ByteSeqED }, { kUTF8ByteSeqEEEF }, { kUTF8ByteSeqF0 },
	{ kUTF8ByteSeqF1F3 }, { kUTF8ByteSeqF4 }, { kUTF8NullRange }
	};

	int valid_utf8_string(const char string, size_t bad_pos)
	{
	int bom_chars = 0;
	uint8_t byte;
	const char *pos = string;
	int ret = 1;
	const utf8seq_t *seq = NULL;
	const u8range_t *range = NULL;

	if (!pos) {
	ret = 0;
	goto error;
	}

	while ((byte = (uint8_t) * (pos++))) {
	if (!range \|\| range->min == 0) {
	if (byte < 128) {
	/* Ascii character. */
	continue;
	}

	if (bom_chars < ARRAY_SIZE(kUTF8ByteOrderMask)) {
	if (byte == kUTF8ByteOrderMask[bom_chars]) {
	bom_chars++;
	continue;
	} else {
	/* Characters not matching BOM.
	* Rewind and assume that there is
	* no BOM. */
	bom_chars =
	ARRAY_SIZE(kUTF8ByteOrderMask);
	pos = string;
	continue;
	}
	}

	/* Find the matching sequence of characters by
	* matching the first character in the sequence.
	*/
	seq = kUTF8Sequences;
	while (seq->ranges->min != 0) {
	if (byte >= seq->ranges->min &&
	byte <= seq->ranges->max) {
	/* Matching sequence. */
	break;
	}
	seq++;
	}

	if (seq->ranges->min == 0) {
	/* Could not find a matching sequence. */
	ret = 0;
	goto error;
	}

	/* Found the appropriate sequence. */
	range = seq->ranges + 1;
	continue;
	}

	if (byte >= range->min && byte <= range->max) {
	range++;
	continue;
	}

	/* This character doesn't belong in UTF8. */
	ret = 0;
	goto error;
	}

	if (range && range->min != 0) {
	/* Stopped in the middle of a sequence. */
	ret = 0;
	}

	error:
	if (bad_pos)
	*bad_pos = pos - string - 1;
	return ret;
	}

	#ifdef CRAS_DBUS
	/* Use the DBus implementation if available to ensure that the UTF-8
	* sequences match those expected by the DBus implementation. */

	int is_utf8_string(const char *string)
	{
	return !!dbus_validate_utf8(string, NULL);
	}

	#else

	int is_utf8_string(const char *string)
	{
	return valid_utf8_string(string, NULL);
	}

	#endif