summaryrefslogblamecommitdiffstats
path: root/src/include/ipxe/utf8.h
blob: 299c255110ff27ec90da03bffd7360d5e8e349da (plain) (tree)




































































                                                                      
#ifndef _IPXE_UTF8_H
#define _IPXE_UTF8_H

/** @file
 *
 * UTF-8 Unicode encoding
 *
 */

FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );

#include <stdint.h>

/** Maximum length of UTF-8 sequence */
#define UTF8_MAX_LEN 4

/** Minimum legal value for two-byte UTF-8 sequence */
#define UTF8_MIN_TWO 0x80

/** Minimum legal value for three-byte UTF-8 sequence */
#define UTF8_MIN_THREE 0x800

/** Minimum legal value for four-byte UTF-8 sequence */
#define UTF8_MIN_FOUR 0x10000

/** High bit of UTF-8 bytes */
#define UTF8_HIGH_BIT 0x80

/** Number of data bits in each continuation byte */
#define UTF8_CONTINUATION_BITS 6

/** Bit mask for data bits in a continuation byte */
#define UTF8_CONTINUATION_MASK ( ( 1 << UTF8_CONTINUATION_BITS ) - 1 )

/** Non-data bits in a continuation byte */
#define UTF8_CONTINUATION 0x80

/** Check for a continuation byte
 *
 * @v byte		UTF-8 byte
 * @ret is_continuation	Byte is a continuation byte
 */
#define UTF8_IS_CONTINUATION( byte ) \
	( ( (byte) & ~UTF8_CONTINUATION_MASK ) == UTF8_CONTINUATION )

/** Check for an ASCII byte
 *
 * @v byte		UTF-8 byte
 * @ret is_ascii	Byte is an ASCII byte
 */
#define UTF8_IS_ASCII( byte ) ( ! ( (byte) & UTF8_HIGH_BIT ) )

/** Invalid character returned when decoding fails */
#define UTF8_INVALID 0xfffd

/** A UTF-8 character accumulator */
struct utf8_accumulator {
	/** Character in progress */
	unsigned int character;
	/** Number of remaining continuation bytes */
	unsigned int remaining;
	/** Minimum legal character */
	unsigned int min;
};

extern unsigned int utf8_accumulate ( struct utf8_accumulator *utf8,
				      uint8_t byte );

#endif /* _IPXE_UTF8_H */