完成 Error定义和ErrorLog. 以及一些相关的东西

This commit is contained in:
2026-02-13 23:11:37 +08:00
parent cfcdfde170
commit 877253cbbc
22 changed files with 2200 additions and 0 deletions

13
src/Deps/Deps.hpp Normal file
View File

@@ -0,0 +1,13 @@
#pragma once
#include <Core/CoreInfos.hpp>
#include <Deps/HashMap/HashMap.hpp>
#include <Deps/String/String.hpp>
namespace Fig
{
#ifdef __FCORE_LINK_DEPS
using Deps::String;
using Deps::HashMap;
#endif
};

View File

@@ -0,0 +1,13 @@
#pragma once
#include <unordered_map>
namespace Fig::Deps
{
template <class _Key,
class _Tp,
class _Hash = std::hash<_Key>,
class _Pred = std::equal_to<_Key>,
class _Alloc = std::allocator<std::pair<const _Key, _Tp> >>
using HashMap = std::unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>;
};

View File

@@ -0,0 +1,123 @@
#pragma once
namespace Fig::Deps
{
class CharUtils
{
public:
using U32 = char32_t;
// ===== 基础 =====
static constexpr bool isValidScalar(U32 c) noexcept { return c <= 0x10FFFF && !(c >= 0xD800 && c <= 0xDFFF); }
static constexpr bool isAscii(U32 c) noexcept { return c <= 0x7F; }
static constexpr bool isControl(U32 c) noexcept { return (c <= 0x1F) || (c == 0x7F); }
static constexpr bool isPrintable(U32 c) noexcept { return !isControl(c); }
// ===== ASCII 分类 =====
static constexpr bool isAsciiLower(U32 c) noexcept { return c >= U'a' && c <= U'z'; }
static constexpr bool isAsciiUpper(U32 c) noexcept { return c >= U'A' && c <= U'Z'; }
static constexpr bool isAsciiAlpha(U32 c) noexcept { return isAsciiLower(c) || isAsciiUpper(c); }
static constexpr bool isAsciiDigit(U32 c) noexcept { return c >= U'0' && c <= U'9'; }
static constexpr bool isAsciiHexDigit(U32 c) noexcept
{
return isAsciiDigit(c) || (c >= U'a' && c <= U'f') || (c >= U'A' && c <= U'F');
}
static constexpr bool isAsciiSpace(U32 c) noexcept { return c == U' ' || (c >= 0x09 && c <= 0x0D); }
static constexpr bool isAsciiPunct(U32 c) noexcept
{
return (c >= 33 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126);
}
// ===== Unicode White_Space =====
static constexpr bool isSpace(U32 c) noexcept
{
if (isAscii(c)) return isAsciiSpace(c);
switch (c)
{
case 0x0085:
case 0x00A0:
case 0x1680:
case 0x2000:
case 0x2001:
case 0x2002:
case 0x2003:
case 0x2004:
case 0x2005:
case 0x2006:
case 0x2007:
case 0x2008:
case 0x2009:
case 0x200A:
case 0x2028:
case 0x2029:
case 0x202F:
case 0x205F:
case 0x3000: return true;
}
return false;
}
// ===== Unicode Decimal_Number =====
static constexpr bool isDigit(U32 c) noexcept
{
if (isAscii(c)) return isAsciiDigit(c);
return (c >= 0x0660 && c <= 0x0669) || (c >= 0x06F0 && c <= 0x06F9) || (c >= 0x0966 && c <= 0x096F)
|| (c >= 0x09E6 && c <= 0x09EF) || (c >= 0x0A66 && c <= 0x0A6F) || (c >= 0x0AE6 && c <= 0x0AEF)
|| (c >= 0x0B66 && c <= 0x0B6F) || (c >= 0x0BE6 && c <= 0x0BEF) || (c >= 0x0C66 && c <= 0x0C6F)
|| (c >= 0x0CE6 && c <= 0x0CEF) || (c >= 0x0D66 && c <= 0x0D6F) || (c >= 0x0E50 && c <= 0x0E59)
|| (c >= 0x0ED0 && c <= 0x0ED9) || (c >= 0x0F20 && c <= 0x0F29) || (c >= 0x1040 && c <= 0x1049)
|| (c >= 0x17E0 && c <= 0x17E9) || (c >= 0x1810 && c <= 0x1819) || (c >= 0xFF10 && c <= 0xFF19);
}
// ===== Unicode Letter =====
static constexpr bool isAlpha(U32 c) noexcept
{
if (isAscii(c)) return isAsciiAlpha(c);
return (c >= 0x00C0 && c <= 0x02AF) || (c >= 0x0370 && c <= 0x052F) || (c >= 0x0530 && c <= 0x058F)
|| (c >= 0x0590 && c <= 0x05FF) || (c >= 0x0600 && c <= 0x06FF) || (c >= 0x0900 && c <= 0x097F)
|| (c >= 0x3040 && c <= 0x30FF) || (c >= 0x3100 && c <= 0x312F) || (c >= 0x4E00 && c <= 0x9FFF)
|| (c >= 0xAC00 && c <= 0xD7AF);
}
// ===== 标点 / 符号 / 分隔符(工程近似)=====
static constexpr bool isPunct(U32 c) noexcept
{
if (isAscii(c)) return isAsciiPunct(c);
return (c >= 0x2000 && c <= 0x206F);
}
static constexpr bool isSymbol(U32 c) noexcept
{
return (c >= 0x20A0 && c <= 0x20CF) || // currency
(c >= 0x2100 && c <= 0x214F) || // letterlike
(c >= 0x2190 && c <= 0x21FF) || // arrows
(c >= 0x2600 && c <= 0x26FF) || // misc symbols
(c >= 0x1F300 && c <= 0x1FAFF); // emoji block
}
// ===== 组合 =====
static constexpr bool isAlnum(U32 c) noexcept { return isAlpha(c) || isDigit(c); }
static constexpr bool isHexDigit(U32 c) noexcept { return isAsciiHexDigit(c); }
static constexpr bool isIdentifierStart(U32 c) noexcept { return isAlpha(c) || c == U'_'; }
static constexpr bool isIdentifierContinue(U32 c) noexcept { return isAlnum(c) || c == U'_'; }
};
};

960
src/Deps/String/String.hpp Normal file
View File

@@ -0,0 +1,960 @@
#pragma once
#include <cstdint>
#include <cassert>
#include <cstring>
#include <vector>
#include <ostream>
#include <format>
namespace Fig::Deps
{
class StringUtils
{
public:
static bool is_pure_ascii(const char *data, size_t n) noexcept
{
for (size_t i = 0; i < n; ++i)
{
if (static_cast<unsigned char>(data[i]) >= 128) return false;
}
return true;
}
static bool is_pure_ascii(const char32_t *data, size_t n) noexcept
{
for (size_t i = 0; i < n; ++i)
{
if (data[i] >= 128) return false;
}
return true;
}
static size_t utf8_decode_one(const char *s, size_t n, char32_t &out)
{
unsigned char c0 = static_cast<unsigned char>(s[0]);
if (c0 < 0x80)
{
out = c0;
return 1;
}
if ((c0 >> 5) == 0x6 && n >= 2)
{
unsigned char c1 = static_cast<unsigned char>(s[1]);
out = ((c0 & 0x1F) << 6) | (c1 & 0x3F);
return 2;
}
if ((c0 >> 4) == 0xE && n >= 3)
{
unsigned char c1 = static_cast<unsigned char>(s[1]);
unsigned char c2 = static_cast<unsigned char>(s[2]);
out = ((c0 & 0x0F) << 12) | ((c1 & 0x3F) << 6) | (c2 & 0x3F);
return 3;
}
if ((c0 >> 3) == 0x1E && n >= 4)
{
unsigned char c1 = static_cast<unsigned char>(s[1]);
unsigned char c2 = static_cast<unsigned char>(s[2]);
unsigned char c3 = static_cast<unsigned char>(s[3]);
out = ((c0 & 0x07) << 18) | ((c1 & 0x3F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
return 4;
}
out = 0xFFFD;
return 1;
}
};
class String
{
public:
using u32 = char32_t;
static constexpr uint8_t SSO_SIZE = 22;
enum class Mode : uint8_t
{
ASCII_SSO, // ASCII
ASCII_HEP, // ASCII heap
UTF32_HEP, // UTF32 heap
};
private:
Mode mode = Mode::ASCII_SSO;
union
{
unsigned char sso[SSO_SIZE]; // non null terminate
std::vector<unsigned char> ascii;
std::vector<u32> utf32;
};
size_t _length = 0;
void copyfrom(const String &other)
{
destroy();
_length = other._length;
mode = other.mode;
if (mode == Mode::ASCII_SSO) { memcpy(sso, other.sso, sizeof(unsigned char) * _length); }
else if (mode == Mode::ASCII_HEP) { new (&ascii) std::vector<unsigned char>(other.ascii); }
else
{
new (&utf32) std::vector<u32>(other.utf32);
}
}
void movefrom(String &&other) noexcept
{
destroy();
mode = other.mode;
_length = other._length;
switch (mode)
{
case Mode::ASCII_SSO: std::memcpy(sso, other.sso, other._length); break;
case Mode::ASCII_HEP: new (&ascii) std::vector<unsigned char>(std::move(other.ascii)); break;
case Mode::UTF32_HEP: new (&utf32) std::vector<u32>(std::move(other.utf32)); break;
}
other.mode = Mode::ASCII_SSO;
other._length = 0;
}
void destroy() noexcept
{
if (mode == Mode::ASCII_SSO)
{
// pass
}
if (mode == Mode::ASCII_HEP) { ascii.~vector(); }
if (mode == Mode::UTF32_HEP) { utf32.~vector(); }
}
void ensure_utf32()
{
if (mode == Mode::UTF32_HEP) return;
std::vector<u32> tmp;
tmp.reserve(_length);
if (mode == Mode::ASCII_SSO)
{
for (size_t i = 0; i < _length; ++i) tmp.push_back(static_cast<u32>(sso[i]));
}
else // ASCII_HEP
{
for (unsigned char c : ascii) tmp.push_back(static_cast<u32>(c));
}
destroy();
mode = Mode::UTF32_HEP;
new (&utf32) std::vector<u32>(std::move(tmp));
}
void promote_sso_ascii_to_heap() noexcept
{
assert(mode == Mode::ASCII_SSO && "promote_sso_ascii_to_heap: mode is not ascii sso");
mode = Mode::ASCII_HEP;
std::vector<unsigned char> tmp;
tmp.reserve(_length);
for (size_t i = 0; i < _length; ++i) tmp.push_back(sso[i]);
mode = Mode::ASCII_HEP;
new (&ascii) std::vector<unsigned char>(std::move(tmp));
}
void init(const char *data)
{
assert(data);
size_t n = std::strlen(data);
init(data, n);
}
void init(const char *data, size_t n)
{
destroy();
_length = 0;
// ASCII 快路径
if (n <= SSO_SIZE && StringUtils::is_pure_ascii(data, n))
{
mode = Mode::ASCII_SSO;
std::memcpy(sso, data, n);
_length = n;
return;
}
if (StringUtils::is_pure_ascii(data, n))
{
mode = Mode::ASCII_HEP;
new (&ascii) std::vector<unsigned char>(data, data + n);
_length = n;
return;
}
// UTF-8 decode
mode = Mode::UTF32_HEP;
new (&utf32) std::vector<u32>();
utf32.reserve(n);
for (size_t i = 0; i < n;)
{
u32 cp;
size_t step = StringUtils::utf8_decode_one(data + i, n - i, cp);
utf32.push_back(cp);
i += step;
}
utf32.shrink_to_fit();
_length = utf32.size();
}
void init(const u32 *data)
{
assert(data);
size_t n = 0;
while (data[n] != 0) ++n;
init(data, n);
}
void init(const u32 *data, size_t n)
{
destroy();
_length = n;
if (n <= SSO_SIZE && StringUtils::is_pure_ascii(data, n))
{
mode = Mode::ASCII_SSO;
for (size_t i = 0; i < n; ++i) sso[i] = static_cast<unsigned char>(data[i]);
return;
}
if (StringUtils::is_pure_ascii(data, n))
{
mode = Mode::ASCII_HEP;
new (&ascii) std::vector<unsigned char>();
ascii.reserve(n);
for (size_t i = 0; i < n; ++i) ascii.push_back(static_cast<unsigned char>(data[i]));
return;
}
mode = Mode::UTF32_HEP;
new (&utf32) std::vector<u32>();
utf32.assign(data, data + n);
}
public:
size_t length() const noexcept { return _length; }
size_t size() const noexcept { return _length; }
bool empty() const noexcept { return _length == 0; }
void reserve(size_t n)
{
if (mode == Mode::ASCII_HEP)
ascii.reserve(n);
else if (mode == Mode::UTF32_HEP)
utf32.reserve(n);
}
void clear() noexcept
{
_length = 0;
if (mode == Mode::ASCII_SSO)
{
// pass
}
if (mode == Mode::ASCII_HEP) { ascii.clear(); }
else
{
utf32.clear();
}
}
void shrink_to_fit() noexcept
{
if (mode == Mode::ASCII_HEP) { ascii.shrink_to_fit(); }
else
{
utf32.shrink_to_fit();
}
}
~String() noexcept { destroy(); }
String() noexcept
{
mode = Mode::ASCII_SSO;
_length = 0;
}
String(const String &other) noexcept { copyfrom(other); }
String(String &&other) noexcept { movefrom(std::move(other)); }
String(const char *str) { init(str); }
String(const char32_t *str) { init(str); }
String(const std::string &s) { init(s.data(), s.size()); }
static String fromPureAscii(const char *str)
{
String string;
string._length = std::strlen(str);
if (string._length <= SSO_SIZE) { memcpy(string.sso, str, string._length); }
else
{
string.ascii.reserve(string._length);
for (size_t i = 0; i < string._length; ++i) { string.ascii.push_back(str[i]); }
}
return string;
}
String &operator=(const String &other)
{
if (this != &other)
{
destroy();
copyfrom(other);
}
return *this;
}
String &operator=(String &&other) noexcept
{
if (this != &other) movefrom(std::move(other));
return *this;
}
String &operator+=(const String &rhs)
{
if (rhs._length == 0) return *this;
// 两边都是 ASCII
bool this_ascii = (mode == Mode::ASCII_SSO || mode == Mode::ASCII_HEP);
bool rhs_ascii = (rhs.mode == Mode::ASCII_SSO || rhs.mode == Mode::ASCII_HEP);
if (this_ascii && rhs_ascii)
{
size_t newlen = _length + rhs._length;
// SSO 可容纳
if (mode == Mode::ASCII_SSO && newlen <= SSO_SIZE)
{
if (rhs.mode == Mode::ASCII_SSO)
std::memcpy(sso + _length, rhs.sso, rhs._length);
else
std::memcpy(sso + _length, rhs.ascii.data(), rhs._length);
_length = newlen;
return *this;
}
if (mode == Mode::ASCII_SSO) promote_sso_ascii_to_heap();
// 追加
if (rhs.mode == Mode::ASCII_SSO)
ascii.insert(ascii.end(), rhs.sso, rhs.sso + rhs._length);
else
ascii.insert(ascii.end(), rhs.ascii.begin(), rhs.ascii.end());
_length = newlen;
return *this;
}
// 必须 UTF32
if (mode != Mode::UTF32_HEP)
{
std::vector<u32> tmp;
tmp.reserve(_length + rhs._length);
if (mode == Mode::ASCII_SSO)
{
for (size_t i = 0; i < _length; ++i) tmp.push_back(static_cast<u32>(sso[i]));
}
else // ASCII_HEP
{
for (unsigned char c : ascii) tmp.push_back(static_cast<u32>(c));
}
destroy();
mode = Mode::UTF32_HEP;
new (&utf32) std::vector<u32>(std::move(tmp));
}
if (rhs.mode == Mode::UTF32_HEP) { utf32.insert(utf32.end(), rhs.utf32.begin(), rhs.utf32.end()); }
else if (rhs.mode == Mode::ASCII_SSO)
{
for (size_t i = 0; i < rhs._length; ++i) utf32.push_back(static_cast<u32>(rhs.sso[i]));
}
else // ASCII_HEP
{
for (unsigned char c : rhs.ascii) utf32.push_back(static_cast<u32>(c));
}
_length = utf32.size();
return *this;
}
String &operator+=(const char *utf8)
{
String tmp(utf8);
return (*this += tmp);
}
friend String operator+(String lhs, const String &rhs)
{
lhs += rhs;
return lhs;
}
void push_back(u32 cp)
{
if (cp < 128)
{
if (mode == Mode::ASCII_SSO && _length < SSO_SIZE)
{
sso[_length++] = static_cast<unsigned char>(cp);
return;
}
if (mode == Mode::ASCII_SSO) promote_sso_ascii_to_heap();
if (mode == Mode::ASCII_HEP)
{
ascii.push_back(static_cast<unsigned char>(cp));
++_length;
return;
}
}
ensure_utf32();
utf32.push_back(cp);
_length = utf32.size();
}
void pop_back()
{
assert(_length > 0);
if (mode == Mode::ASCII_SSO)
{
--_length;
return;
}
if (mode == Mode::ASCII_HEP)
{
ascii.pop_back();
--_length;
return;
}
utf32.pop_back();
_length = utf32.size();
}
String &append(const char *utf8)
{
String tmp(utf8);
*this += tmp;
return *this;
}
String &append(const char32_t *u32str)
{
String tmp(u32str);
*this += tmp;
return *this;
}
String &append(size_t count, u32 cp)
{
for (size_t i = 0; i < count; ++i) push_back(cp);
return *this;
}
void resize(size_t new_size, u32 fill = 0)
{
if (new_size <= _length)
{
erase(new_size);
return;
}
append(new_size - _length, fill);
}
u32 front() const
{
assert(_length > 0);
return (*this)[0];
}
u32 back() const
{
assert(_length > 0);
return (*this)[_length - 1];
}
std::string toStdString() const
{
std::string out;
if (mode == Mode::ASCII_SSO)
{
out.assign(reinterpret_cast<const char *>(sso), _length);
return out;
}
if (mode == Mode::ASCII_HEP)
{
out.assign(ascii.begin(), ascii.end());
return out;
}
// UTF32_HEP -> UTF-8 encode
for (u32 cp : utf32)
{
if (cp <= 0x7F) { out.push_back(static_cast<char>(cp)); }
else if (cp <= 0x7FF)
{
out.push_back(static_cast<char>(0xC0 | (cp >> 6)));
out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
}
else if (cp <= 0xFFFF)
{
out.push_back(static_cast<char>(0xE0 | (cp >> 12)));
out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
}
else if (cp <= 0x10FFFF)
{
out.push_back(static_cast<char>(0xF0 | (cp >> 18)));
out.push_back(static_cast<char>(0x80 | ((cp >> 12) & 0x3F)));
out.push_back(static_cast<char>(0x80 | ((cp >> 6) & 0x3F)));
out.push_back(static_cast<char>(0x80 | (cp & 0x3F)));
}
// 非法码点
}
return out;
}
friend std::ostream &operator<<(std::ostream &os, const String &s) { return os << s.toStdString(); }
friend bool operator==(const String &a, const String &b) noexcept
{
if (a._length != b._length) return false;
// 同模式
if (a.mode == b.mode)
{
if (a.mode == Mode::ASCII_SSO) return std::memcmp(a.sso, b.sso, a._length) == 0;
if (a.mode == Mode::ASCII_HEP) return a.ascii == b.ascii;
return a.utf32 == b.utf32;
}
// 不同模式ASCII / UTF32
const bool a_ascii = (a.mode == Mode::ASCII_SSO || a.mode == Mode::ASCII_HEP);
const bool b_ascii = (b.mode == Mode::ASCII_SSO || b.mode == Mode::ASCII_HEP);
if (a_ascii && b_ascii)
{
if (a.mode == Mode::ASCII_SSO)
return std::memcmp(a.sso, b.ascii.data(), a._length) == 0;
else
return std::memcmp(a.ascii.data(), b.sso, a._length) == 0;
}
// ASCII / UTF32
const String &ascii_str = a_ascii ? a : b;
const String &utf32_str = a_ascii ? b : a;
if (ascii_str.mode == Mode::ASCII_SSO)
{
for (size_t i = 0; i < ascii_str._length; ++i)
if (static_cast<u32>(ascii_str.sso[i]) != utf32_str.utf32[i]) return false;
}
else
{
for (size_t i = 0; i < ascii_str._length; ++i)
if (static_cast<u32>(ascii_str.ascii[i]) != utf32_str.utf32[i]) return false;
}
return true;
}
friend bool operator!=(const String &a, const String &b) noexcept { return !(a == b); }
// std::hash
friend struct std::hash<String>;
// read only
u32 operator[](size_t i) const
{
assert(i < _length);
if (mode == Mode::ASCII_SSO) return static_cast<u32>(sso[i]);
if (mode == Mode::ASCII_HEP) return static_cast<u32>(ascii[i]);
return utf32[i];
}
u32 at(size_t i) const
{
if (i >= _length) throw std::out_of_range("String::at");
return (*this)[i];
}
bool starts_with(const String &prefix) const
{
if (prefix._length > _length) return false;
for (size_t i = 0; i < prefix._length; ++i)
if ((*this)[i] != prefix[i]) return false;
return true;
}
bool ends_with(const String &suffix) const
{
if (suffix._length > _length) return false;
size_t offset = _length - suffix._length;
for (size_t i = 0; i < suffix._length; ++i)
if ((*this)[offset + i] != suffix[i]) return false;
return true;
}
bool contains(u32 cp) const
{
if (mode == Mode::ASCII_SSO)
{
for (size_t i = 0; i < _length; ++i)
if (sso[i] == cp) return true;
return false;
}
if (mode == Mode::ASCII_HEP)
{
if (cp >= 128) return false;
for (unsigned char c : ascii)
if (c == cp) return true;
return false;
}
for (u32 c : utf32)
if (c == cp) return true;
return false;
}
String substr(size_t pos, size_t count = size_t(-1)) const
{
if (pos >= _length) return String();
size_t len = (_length - pos < count) ? (_length - pos) : count;
String out;
// ASCII_SSO
if (mode == Mode::ASCII_SSO)
{
if (len <= SSO_SIZE)
{
out.mode = Mode::ASCII_SSO;
std::memcpy(out.sso, sso + pos, len);
out._length = len;
}
else
{
out.mode = Mode::ASCII_HEP;
new (&out.ascii) std::vector<unsigned char>(sso + pos, sso + pos + len);
out._length = len;
}
return out;
}
// ASCII_HEP
if (mode == Mode::ASCII_HEP)
{
if (len <= SSO_SIZE)
{
out.mode = Mode::ASCII_SSO;
std::memcpy(out.sso, ascii.data() + pos, len);
out._length = len;
}
else
{
out.mode = Mode::ASCII_HEP;
new (&out.ascii) std::vector<unsigned char>(ascii.begin() + pos, ascii.begin() + pos + len);
out._length = len;
}
return out;
}
// UTF32
out.mode = Mode::UTF32_HEP;
new (&out.utf32) std::vector<u32>(utf32.begin() + pos, utf32.begin() + pos + len);
out._length = len;
return out;
}
String &erase(size_t pos, size_t count = size_t(-1))
{
if (pos >= _length) return *this;
size_t len = (_length - pos < count) ? (_length - pos) : count;
if (mode == Mode::ASCII_SSO)
{
std::memmove(sso + pos, sso + pos + len, _length - pos - len);
_length -= len;
return *this;
}
if (mode == Mode::ASCII_HEP)
{
ascii.erase(ascii.begin() + pos, ascii.begin() + pos + len);
_length -= len;
return *this;
}
utf32.erase(utf32.begin() + pos, utf32.begin() + pos + len);
_length = utf32.size();
return *this;
}
String &insert(size_t pos, const String &other)
{
if (pos > _length) pos = _length;
if (other._length == 0) return *this;
bool this_ascii = (mode != Mode::UTF32_HEP);
bool other_ascii = (other.mode != Mode::UTF32_HEP);
// ASCII 合并路径
if (this_ascii && other_ascii)
{
size_t newlen = _length + other._length;
if (mode == Mode::ASCII_SSO && newlen <= SSO_SIZE)
{
std::memmove(sso + pos + other._length, sso + pos, _length - pos);
if (other.mode == Mode::ASCII_SSO)
std::memcpy(sso + pos, other.sso, other._length);
else
std::memcpy(sso + pos, other.ascii.data(), other._length);
_length = newlen;
return *this;
}
if (mode == Mode::ASCII_SSO) promote_sso_ascii_to_heap();
if (other.mode == Mode::ASCII_SSO)
ascii.insert(ascii.begin() + pos, other.sso, other.sso + other._length);
else
ascii.insert(ascii.begin() + pos, other.ascii.begin(), other.ascii.end());
_length = newlen;
return *this;
}
// UTF32 路径
ensure_utf32();
if (other.mode == Mode::UTF32_HEP)
utf32.insert(utf32.begin() + pos, other.utf32.begin(), other.utf32.end());
else if (other.mode == Mode::ASCII_SSO)
for (size_t i = 0; i < other._length; ++i)
utf32.insert(utf32.begin() + pos + i, static_cast<u32>(other.sso[i]));
else
for (size_t i = 0; i < other._length; ++i)
utf32.insert(utf32.begin() + pos + i, static_cast<u32>(other.ascii[i]));
_length = utf32.size();
return *this;
}
int compare(const String &other) const noexcept
{
size_t n = (_length < other._length) ? _length : other._length;
for (size_t i = 0; i < n; ++i)
{
u32 a = (*this)[i];
u32 b = other[i];
if (a != b) return (a < b) ? -1 : 1;
}
if (_length == other._length) return 0;
return (_length < other._length) ? -1 : 1;
}
size_t find(const String &needle, size_t pos = 0) const
{
if (needle._length == 0) return pos <= _length ? pos : size_t(-1);
if (needle._length > _length || pos >= _length) return size_t(-1);
size_t limit = _length - needle._length;
for (size_t i = pos; i <= limit; ++i)
{
size_t j = 0;
for (; j < needle._length; ++j)
if ((*this)[i + j] != needle[j]) break;
if (j == needle._length) return i;
}
return size_t(-1);
}
size_t rfind(const String &needle) const
{
if (needle._length == 0) return _length;
if (needle._length > _length) return size_t(-1);
for (size_t i = _length - needle._length + 1; i-- > 0;)
{
size_t j = 0;
for (; j < needle._length; ++j)
if ((*this)[i + j] != needle[j]) break;
if (j == needle._length) return i;
}
return size_t(-1);
}
String &replace(size_t pos, size_t len, const String &repl)
{
if (pos >= _length) return *this;
size_t erase_len = (_length - pos < len) ? (_length - pos) : len;
// ASCII路径
bool this_ascii = (mode != Mode::UTF32_HEP);
bool repl_ascii = (repl.mode != Mode::UTF32_HEP);
if (this_ascii && repl_ascii)
{
size_t newlen = _length - erase_len + repl._length;
// SSO容纳
if (mode == Mode::ASCII_SSO && newlen <= SSO_SIZE)
{
std::memmove(sso + pos + repl._length, sso + pos + erase_len, _length - pos - erase_len);
if (repl.mode == Mode::ASCII_SSO)
std::memcpy(sso + pos, repl.sso, repl._length);
else
std::memcpy(sso + pos, repl.ascii.data(), repl._length);
_length = newlen;
return *this;
}
if (mode == Mode::ASCII_SSO) promote_sso_ascii_to_heap();
ascii.erase(ascii.begin() + pos, ascii.begin() + pos + erase_len);
if (repl.mode == Mode::ASCII_SSO)
ascii.insert(ascii.begin() + pos, repl.sso, repl.sso + repl._length);
else
ascii.insert(ascii.begin() + pos, repl.ascii.begin(), repl.ascii.end());
_length = newlen;
return *this;
}
// UTF32路径
ensure_utf32();
utf32.erase(utf32.begin() + pos, utf32.begin() + pos + erase_len);
if (repl.mode == Mode::UTF32_HEP)
utf32.insert(utf32.begin() + pos, repl.utf32.begin(), repl.utf32.end());
else if (repl.mode == Mode::ASCII_SSO)
for (size_t i = 0; i < repl._length; ++i)
utf32.insert(utf32.begin() + pos + i, static_cast<u32>(repl.sso[i]));
else
for (size_t i = 0; i < repl._length; ++i)
utf32.insert(utf32.begin() + pos + i, static_cast<u32>(repl.ascii[i]));
_length = utf32.size();
return *this;
}
};
}; // namespace Fig::Deps
namespace std
{
template <>
struct hash<Fig::Deps::String>
{
size_t operator()(const Fig::Deps::String &s) const noexcept
{
using String = Fig::Deps::String;
using u32 = String::u32;
const size_t FNV_offset = 1469598103934665603ull;
const size_t FNV_prime = 1099511628211ull;
size_t h = FNV_offset;
if (s.mode == String::Mode::ASCII_SSO)
{
for (size_t i = 0; i < s._length; ++i)
{
h ^= s.sso[i];
h *= FNV_prime;
}
return h;
}
if (s.mode == String::Mode::ASCII_HEP)
{
for (unsigned char c : s.ascii)
{
h ^= c;
h *= FNV_prime;
}
return h;
}
// UTF32
for (u32 cp : s.utf32)
{
h ^= static_cast<size_t>(cp);
h *= FNV_prime;
}
return h;
}
};
template <>
struct std::formatter<Fig::Deps::String, char>
{
// 不支持自定义格式说明符
constexpr auto parse(std::format_parse_context &ctx) { return ctx.begin(); }
template <typename FormatContext>
auto format(const Fig::Deps::String &s, FormatContext &ctx) const
{
return std::format_to(ctx.out(), "{}", s.toStdString());
}
};
} // namespace std

View File

@@ -0,0 +1,137 @@
#include <cassert>
#include <iostream>
#include "String.hpp"
using Fig::Deps::String;
static void test_ascii_sso()
{
String s("hello");
assert(s.size() == 5);
assert(s[0] == U'h');
assert(s.toStdString() == "hello");
s.push_back(U'!');
assert(s.toStdString() == "hello!");
s.pop_back();
assert(s.toStdString() == "hello");
assert(s.starts_with("he"));
assert(s.ends_with("lo"));
assert(s.contains(U'e'));
}
static void test_ascii_heap()
{
String a("abcdefghijklmnopqrstuvwxyz"); // > SSO
assert(a.size() == 26);
String b("123");
a += b;
assert(a.ends_with("123"));
assert(a.find(U'1') == 26);
}
static void test_utf8_decode()
{
String s("你好");
assert(s.size() == 2);
assert(s.toStdString() == "你好");
s.push_back(U'!');
assert(s.toStdString() == "你好!");
}
static void test_concat_modes()
{
String a("abc");
String b("你好");
String c = a + b;
assert(c.size() == 5);
assert(c.toStdString() == "abc你好");
String d = b + a;
assert(d.toStdString() == "你好abc");
}
static void test_substr_erase_insert()
{
String s("abcdef");
String sub = s.substr(2, 3);
assert(sub.toStdString() == "cde");
s.erase(2, 2);
assert(s.toStdString() == "abef");
s.insert(2, String("CD"));
assert(s.toStdString() == "abCDef");
}
static void test_replace()
{
String s("hello world");
s.replace(6, 5, String("Fig"));
assert(s.toStdString() == "hello Fig");
}
static void test_find_rfind()
{
String s("abcabcabc");
assert(s.find(String("abc")) == 0);
assert(s.find(String("abc"), 1) == 3);
assert(s.rfind(String("abc")) == 6);
}
static void test_compare()
{
String a("abc");
String b("abd");
String c("abc");
assert(a.compare(b) < 0);
assert(b.compare(a) > 0);
assert(a.compare(c) == 0);
assert(a == c);
assert(a != b);
}
static void test_resize_append()
{
String s("abc");
s.resize(5, U'x');
assert(s.toStdString() == "abcxx");
s.append(3, U'y');
assert(s.toStdString() == "abcxxyyy");
}
static void test_std_interop()
{
std::string stds = "hello";
String s(stds);
assert(s.toStdString() == "hello");
s += " world";
assert(s.toStdString() == "hello world");
}
int main()
{
test_ascii_sso();
test_ascii_heap();
test_utf8_decode();
test_concat_modes();
test_substr_erase_insert();
test_replace();
test_find_rfind();
test_compare();
test_resize_append();
test_std_interop();
std::cout << "All String tests passed.\n";
}