Files
Fig/src/Deps/String/CharUtils.hpp
2026-02-14 15:32:11 +08:00

130 lines
4.7 KiB
C++

/*!
@file src/Deps/String/CharUtils.hpp
@brief char32_t type实现
@author PuqiAR (im@puqiar.top)
@date 2026-02-13
*/
#pragma once
namespace Fig::Deps
{
class CharUtils
{
public:
using U32 = char32_t;
// ===== 基础 =====
static constexpr bool isValidScalar(U32 c) noexcept { return c <= 0x10FFFF && !(c >= 0xD800 && c <= 0xDFFF); }
static constexpr bool isAscii(U32 c) noexcept { return c <= 0x7F; }
static constexpr bool isControl(U32 c) noexcept { return (c <= 0x1F) || (c == 0x7F); }
static constexpr bool isPrintable(U32 c) noexcept { return !isControl(c); }
// ===== ASCII 分类 =====
static constexpr bool isAsciiLower(U32 c) noexcept { return c >= U'a' && c <= U'z'; }
static constexpr bool isAsciiUpper(U32 c) noexcept { return c >= U'A' && c <= U'Z'; }
static constexpr bool isAsciiAlpha(U32 c) noexcept { return isAsciiLower(c) || isAsciiUpper(c); }
static constexpr bool isAsciiDigit(U32 c) noexcept { return c >= U'0' && c <= U'9'; }
static constexpr bool isAsciiHexDigit(U32 c) noexcept
{
return isAsciiDigit(c) || (c >= U'a' && c <= U'f') || (c >= U'A' && c <= U'F');
}
static constexpr bool isAsciiSpace(U32 c) noexcept { return c == U' ' || (c >= 0x09 && c <= 0x0D); }
static constexpr bool isAsciiPunct(U32 c) noexcept
{
return (c >= 33 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126);
}
// ===== Unicode White_Space =====
static constexpr bool isSpace(U32 c) noexcept
{
if (isAscii(c)) return isAsciiSpace(c);
switch (c)
{
case 0x0085:
case 0x00A0:
case 0x1680:
case 0x2000:
case 0x2001:
case 0x2002:
case 0x2003:
case 0x2004:
case 0x2005:
case 0x2006:
case 0x2007:
case 0x2008:
case 0x2009:
case 0x200A:
case 0x2028:
case 0x2029:
case 0x202F:
case 0x205F:
case 0x3000: return true;
}
return false;
}
// ===== Unicode Decimal_Number =====
static constexpr bool isDigit(U32 c) noexcept
{
if (isAscii(c)) return isAsciiDigit(c);
return (c >= 0x0660 && c <= 0x0669) || (c >= 0x06F0 && c <= 0x06F9) || (c >= 0x0966 && c <= 0x096F)
|| (c >= 0x09E6 && c <= 0x09EF) || (c >= 0x0A66 && c <= 0x0A6F) || (c >= 0x0AE6 && c <= 0x0AEF)
|| (c >= 0x0B66 && c <= 0x0B6F) || (c >= 0x0BE6 && c <= 0x0BEF) || (c >= 0x0C66 && c <= 0x0C6F)
|| (c >= 0x0CE6 && c <= 0x0CEF) || (c >= 0x0D66 && c <= 0x0D6F) || (c >= 0x0E50 && c <= 0x0E59)
|| (c >= 0x0ED0 && c <= 0x0ED9) || (c >= 0x0F20 && c <= 0x0F29) || (c >= 0x1040 && c <= 0x1049)
|| (c >= 0x17E0 && c <= 0x17E9) || (c >= 0x1810 && c <= 0x1819) || (c >= 0xFF10 && c <= 0xFF19);
}
// ===== Unicode Letter =====
static constexpr bool isAlpha(U32 c) noexcept
{
if (isAscii(c)) return isAsciiAlpha(c);
return (c >= 0x00C0 && c <= 0x02AF) || (c >= 0x0370 && c <= 0x052F) || (c >= 0x0530 && c <= 0x058F)
|| (c >= 0x0590 && c <= 0x05FF) || (c >= 0x0600 && c <= 0x06FF) || (c >= 0x0900 && c <= 0x097F)
|| (c >= 0x3040 && c <= 0x30FF) || (c >= 0x3100 && c <= 0x312F) || (c >= 0x4E00 && c <= 0x9FFF)
|| (c >= 0xAC00 && c <= 0xD7AF);
}
// ===== 标点 / 符号 / 分隔符(工程近似)=====
static constexpr bool isPunct(U32 c) noexcept
{
if (isAscii(c)) return isAsciiPunct(c);
return (c >= 0x2000 && c <= 0x206F);
}
static constexpr bool isSymbol(U32 c) noexcept
{
return (c >= 0x20A0 && c <= 0x20CF) || // currency
(c >= 0x2100 && c <= 0x214F) || // letterlike
(c >= 0x2190 && c <= 0x21FF) || // arrows
(c >= 0x2600 && c <= 0x26FF) || // misc symbols
(c >= 0x1F300 && c <= 0x1FAFF); // emoji block
}
// ===== 组合 =====
static constexpr bool isAlnum(U32 c) noexcept { return isAlpha(c) || isDigit(c); }
static constexpr bool isHexDigit(U32 c) noexcept { return isAsciiHexDigit(c); }
static constexpr bool isIdentifierStart(U32 c) noexcept { return isAlpha(c) || c == U'_'; }
static constexpr bool isIdentifierContinue(U32 c) noexcept { return isAlnum(c) || c == U'_'; }
};
};