Files
Fig-TreeWalker/src/Utils/utils.hpp
PuqiAR 00240f1ed1 [Feat] 模块系统支持,使用 import x.x.x导入
[Fix] Context内部辅助函数修改, getStructName ....
[Feat] 增加字符串下标获取操作,和修改字符操作,实现使用了第四点的函数
[Impl] FString添加新方法 getRealChar, realReplace
[Fun] 在utf8_iterator中辱骂了C++
2025-12-26 20:47:57 +08:00

137 lines
3.8 KiB
C++

#pragma once
#include <Lexer/lexer.hpp>
#include <Core/fig_string.hpp>
#include <string>
#include <locale>
#include <cwctype>
#include <vector>
#include <algorithm>
namespace Fig::Utils
{
inline std::vector<FString> splitSource(FString source)
{
UTF8Iterator it(source);
std::vector<FString> lines;
FString currentLine;
while (!it.isEnd())
{
UTF8Char c = *it;
if (c == U'\n')
{
lines.push_back(currentLine);
currentLine = FString(u8"");
}
else
{
currentLine += c.getString();
}
++it;
}
if (!currentLine.empty())
{
lines.push_back(currentLine);
}
return lines;
}
inline std::u32string utf8ToUtf32(const FString &s)
{
std::u32string result;
size_t i = 0;
while (i < s.size())
{
char32_t codepoint = 0;
unsigned char c = static_cast<unsigned char>(s[i]);
if (c < 0x80)
{
codepoint = c;
i += 1;
}
else if ((c >> 5) == 0x6)
{
codepoint = ((c & 0x1F) << 6) | (static_cast<unsigned char>(s[i + 1]) & 0x3F);
i += 2;
}
else if ((c >> 4) == 0xE)
{
codepoint = ((c & 0x0F) << 12) | ((static_cast<unsigned char>(s[i + 1]) & 0x3F) << 6) | (static_cast<unsigned char>(s[i + 2]) & 0x3F);
i += 3;
}
else if ((c >> 3) == 0x1E)
{
codepoint = ((c & 0x07) << 18) | ((static_cast<unsigned char>(s[i + 1]) & 0x3F) << 12) | ((static_cast<unsigned char>(s[i + 2]) & 0x3F) << 6) | (static_cast<unsigned char>(s[i + 3]) & 0x3F);
i += 4;
}
else
{
i += 1; // 跳过非法字节
continue;
}
result.push_back(codepoint);
}
return result;
}
inline FString utf32ToUtf8(const std::u32string &s)
{
FString result;
for (char32_t cp : s)
{
if (cp < 0x80)
{
result.push_back(static_cast<char8_t>(cp));
}
else if (cp < 0x800)
{
result.push_back(static_cast<char8_t>((cp >> 6) | 0xC0));
result.push_back(static_cast<char8_t>((cp & 0x3F) | 0x80));
}
else if (cp < 0x10000)
{
result.push_back(static_cast<char8_t>((cp >> 12) | 0xE0));
result.push_back(static_cast<char8_t>(((cp >> 6) & 0x3F) | 0x80));
result.push_back(static_cast<char8_t>((cp & 0x3F) | 0x80));
}
else
{
result.push_back(static_cast<char8_t>((cp >> 18) | 0xF0));
result.push_back(static_cast<char8_t>(((cp >> 12) & 0x3F) | 0x80));
result.push_back(static_cast<char8_t>(((cp >> 6) & 0x3F) | 0x80));
result.push_back(static_cast<char8_t>((cp & 0x3F) | 0x80));
}
}
return result;
}
inline FString toLower(const FString &s)
{
std::u32string u32 = utf8ToUtf32(s);
std::locale loc("");
for (auto &ch : u32)
{
ch = std::towlower(ch);
}
return utf32ToUtf8(u32);
}
inline FString toUpper(const FString &s)
{
std::u32string u32 = utf8ToUtf32(s);
std::locale loc("");
for (auto &ch : u32)
{
ch = std::towupper(ch);
}
return utf32ToUtf8(u32);
}
template <class T>
bool vectorContains(const T &t, const std::vector<T> v)
{
return std::find(v.begin(), v.end(), t) != v.end();
}
} // namespace Fig::Utils