diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0239152 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +# Xmake cache +.xmake/ +build/ + +# MacOS Cache +.DS_Store + +.vscode +.VSCodeCounter \ No newline at end of file diff --git a/src/Core/Core.hpp b/src/Core/Core.hpp new file mode 100644 index 0000000..2d6b761 --- /dev/null +++ b/src/Core/Core.hpp @@ -0,0 +1,5 @@ +#pragma once + +#include +#include +#include \ No newline at end of file diff --git a/src/Core/CoreIO.cpp b/src/Core/CoreIO.cpp new file mode 100644 index 0000000..f1ca6b1 --- /dev/null +++ b/src/Core/CoreIO.cpp @@ -0,0 +1,30 @@ +#include +#include + +namespace Fig::CoreIO +{ +#if defined(_WIN32) || defined(__APPLE__) || defined (__linux__) || defined (__unix__) + std::ostream &GetStdOut() + { + static std::ostream &out = std::cout; + return out; + } + std::ostream &GetStdErr() + { + static std::ostream &err = std::cerr; + return err; + } + std::ostream &GetStdLog() + { + static std::ostream &log = std::clog; + return log; + } + std::istream &GetStdCin() + { + static std::istream &cin = std::cin; + return cin; + } +#else + // link +#endif +}; \ No newline at end of file diff --git a/src/Core/CoreIO.hpp b/src/Core/CoreIO.hpp new file mode 100644 index 0000000..3b5e337 --- /dev/null +++ b/src/Core/CoreIO.hpp @@ -0,0 +1,11 @@ +#pragma once + +#include + +namespace Fig::CoreIO +{ + std::ostream &GetStdOut(); + std::ostream &GetStdErr(); + std::ostream &GetStdLog(); + std::istream &GetStdCin(); +}; \ No newline at end of file diff --git a/src/Core/CoreInfos.hpp b/src/Core/CoreInfos.hpp new file mode 100644 index 0000000..07cefe3 --- /dev/null +++ b/src/Core/CoreInfos.hpp @@ -0,0 +1,60 @@ +#pragma once + +#include +#include +#include + +#define __FCORE_VERSION "0.5.0-alpha" + +#if defined(_WIN32) + #define __FCORE_PLATFORM "Windows" +#elif defined(__APPLE__) + #define __FCORE_PLATFORM "Apple" +#elif defined(__linux__) + #define __FCORE_PLATFORM "Linux" +#elif defined(__unix__) + #define __FCORE_PLATFORM "Unix" +#else + #define __FCORE_PLATFORM "Unknown" +#endif + +#if defined(__GNUC__) + #if defined(_WIN32) + #if defined(__clang__) + #define __FCORE_COMPILER "llvm-mingw" + #else + #define __FCORE_COMPILER "MinGW" + #endif + + #else + #define __FCORE_COMPILER "GCC" + #endif +#elif defined(__clang__) + #define __FCORE_COMPILER "Clang" +#elif defined(_MSC_VER) + #define __FCORE_COMPILER "MSVC" +#else + #define __FCORE_COMPILER "Unknown" +#endif + +#if SIZE_MAX == 18446744073709551615ull + #define __FCORE_ARCH "64" +#else + #define __FCORE_ARCH "86" +#endif + +#define __FCORE_LINK_DEPS + +namespace Fig +{ + namespace Core + { + inline constexpr std::string_view VERSION = __FCORE_VERSION; + inline constexpr std::string_view LICENSE = "MIT"; + inline constexpr std::string_view AUTHOR = "PuqiAR"; + inline constexpr std::string_view PLATFORM = __FCORE_PLATFORM; + inline constexpr std::string_view COMPILER = __FCORE_COMPILER; + inline constexpr std::string_view COMPILE_TIME = __FCORE_COMPILE_TIME; + inline constexpr std::string_view ARCH = __FCORE_ARCH; + }; // namespace Core +}; // namespace Fig \ No newline at end of file diff --git a/src/Core/RuntimeTime.cpp b/src/Core/RuntimeTime.cpp new file mode 100644 index 0000000..422473a --- /dev/null +++ b/src/Core/RuntimeTime.cpp @@ -0,0 +1,18 @@ +#include + +#include + +namespace Fig::Time +{ + Clock::time_point start_time; + void init() + { + static bool flag = false; + if (flag) + { + assert(false); + } + start_time = Clock::now(); + flag = true; + } +}; \ No newline at end of file diff --git a/src/Core/RuntimeTime.hpp b/src/Core/RuntimeTime.hpp new file mode 100644 index 0000000..0a69530 --- /dev/null +++ b/src/Core/RuntimeTime.hpp @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace Fig::Time +{ + using Clock = std::chrono::steady_clock; + extern Clock::time_point start_time; // since process start + void init(); +}; \ No newline at end of file diff --git a/src/Core/SourceLocations.hpp b/src/Core/SourceLocations.hpp new file mode 100644 index 0000000..2722cfd --- /dev/null +++ b/src/Core/SourceLocations.hpp @@ -0,0 +1,53 @@ +#pragma once + +#include +#include + +namespace Fig +{ + struct SourcePosition + { + size_t line, column, tok_length; + + SourcePosition() { line = column = tok_length = 0; } + SourcePosition(size_t _line, size_t _column, size_t _tok_length) + { + line = _line; + column = _column; + tok_length = _tok_length; + } + }; + + struct SourceLocation + { + SourcePosition sp; + + Deps::String fileName; + Deps::String packageName; + Deps::String functionName; + + SourceLocation() {} + SourceLocation(SourcePosition _sp, + Deps::String _fileName, + Deps::String _packageName, + Deps::String _functionName) + { + sp = std::move(_sp); + fileName = std::move(_fileName); + packageName = std::move(_packageName); + functionName = std::move(_functionName); + } + SourceLocation(size_t line, + size_t column, + size_t tok_length, + Deps::String _fileName, + Deps::String _packageName, + Deps::String _functionName) + { + sp = SourcePosition(line, column, tok_length); + fileName = std::move(_fileName); + packageName = std::move(_packageName); + functionName = std::move(_functionName); + } + }; +}; // namespace Fig \ No newline at end of file diff --git a/src/Deps/Deps.hpp b/src/Deps/Deps.hpp new file mode 100644 index 0000000..241b69b --- /dev/null +++ b/src/Deps/Deps.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include +#include +#include + +namespace Fig +{ + #ifdef __FCORE_LINK_DEPS + using Deps::String; + using Deps::HashMap; + #endif +}; \ No newline at end of file diff --git a/src/Deps/HashMap/HashMap.hpp b/src/Deps/HashMap/HashMap.hpp new file mode 100644 index 0000000..bc74f88 --- /dev/null +++ b/src/Deps/HashMap/HashMap.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace Fig::Deps +{ + template , + class _Pred = std::equal_to<_Key>, + class _Alloc = std::allocator >> + using HashMap = std::unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>; +}; \ No newline at end of file diff --git a/src/Deps/String/CharUtils.hpp b/src/Deps/String/CharUtils.hpp new file mode 100644 index 0000000..acf82bd --- /dev/null +++ b/src/Deps/String/CharUtils.hpp @@ -0,0 +1,123 @@ +#pragma once + +namespace Fig::Deps +{ + class CharUtils + { + public: + using U32 = char32_t; + + // ===== 基础 ===== + + static constexpr bool isValidScalar(U32 c) noexcept { return c <= 0x10FFFF && !(c >= 0xD800 && c <= 0xDFFF); } + + static constexpr bool isAscii(U32 c) noexcept { return c <= 0x7F; } + + static constexpr bool isControl(U32 c) noexcept { return (c <= 0x1F) || (c == 0x7F); } + + static constexpr bool isPrintable(U32 c) noexcept { return !isControl(c); } + + // ===== ASCII 分类 ===== + + static constexpr bool isAsciiLower(U32 c) noexcept { return c >= U'a' && c <= U'z'; } + static constexpr bool isAsciiUpper(U32 c) noexcept { return c >= U'A' && c <= U'Z'; } + static constexpr bool isAsciiAlpha(U32 c) noexcept { return isAsciiLower(c) || isAsciiUpper(c); } + static constexpr bool isAsciiDigit(U32 c) noexcept { return c >= U'0' && c <= U'9'; } + + static constexpr bool isAsciiHexDigit(U32 c) noexcept + { + return isAsciiDigit(c) || (c >= U'a' && c <= U'f') || (c >= U'A' && c <= U'F'); + } + + static constexpr bool isAsciiSpace(U32 c) noexcept { return c == U' ' || (c >= 0x09 && c <= 0x0D); } + + static constexpr bool isAsciiPunct(U32 c) noexcept + { + return (c >= 33 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126); + } + + // ===== Unicode White_Space ===== + + static constexpr bool isSpace(U32 c) noexcept + { + if (isAscii(c)) return isAsciiSpace(c); + + switch (c) + { + case 0x0085: + case 0x00A0: + case 0x1680: + case 0x2000: + case 0x2001: + case 0x2002: + case 0x2003: + case 0x2004: + case 0x2005: + case 0x2006: + case 0x2007: + case 0x2008: + case 0x2009: + case 0x200A: + case 0x2028: + case 0x2029: + case 0x202F: + case 0x205F: + case 0x3000: return true; + } + return false; + } + + // ===== Unicode Decimal_Number ===== + + static constexpr bool isDigit(U32 c) noexcept + { + if (isAscii(c)) return isAsciiDigit(c); + + return (c >= 0x0660 && c <= 0x0669) || (c >= 0x06F0 && c <= 0x06F9) || (c >= 0x0966 && c <= 0x096F) + || (c >= 0x09E6 && c <= 0x09EF) || (c >= 0x0A66 && c <= 0x0A6F) || (c >= 0x0AE6 && c <= 0x0AEF) + || (c >= 0x0B66 && c <= 0x0B6F) || (c >= 0x0BE6 && c <= 0x0BEF) || (c >= 0x0C66 && c <= 0x0C6F) + || (c >= 0x0CE6 && c <= 0x0CEF) || (c >= 0x0D66 && c <= 0x0D6F) || (c >= 0x0E50 && c <= 0x0E59) + || (c >= 0x0ED0 && c <= 0x0ED9) || (c >= 0x0F20 && c <= 0x0F29) || (c >= 0x1040 && c <= 0x1049) + || (c >= 0x17E0 && c <= 0x17E9) || (c >= 0x1810 && c <= 0x1819) || (c >= 0xFF10 && c <= 0xFF19); + } + + // ===== Unicode Letter ===== + + static constexpr bool isAlpha(U32 c) noexcept + { + if (isAscii(c)) return isAsciiAlpha(c); + + return (c >= 0x00C0 && c <= 0x02AF) || (c >= 0x0370 && c <= 0x052F) || (c >= 0x0530 && c <= 0x058F) + || (c >= 0x0590 && c <= 0x05FF) || (c >= 0x0600 && c <= 0x06FF) || (c >= 0x0900 && c <= 0x097F) + || (c >= 0x3040 && c <= 0x30FF) || (c >= 0x3100 && c <= 0x312F) || (c >= 0x4E00 && c <= 0x9FFF) + || (c >= 0xAC00 && c <= 0xD7AF); + } + + // ===== 标点 / 符号 / 分隔符(工程近似)===== + + static constexpr bool isPunct(U32 c) noexcept + { + if (isAscii(c)) return isAsciiPunct(c); + return (c >= 0x2000 && c <= 0x206F); + } + + static constexpr bool isSymbol(U32 c) noexcept + { + return (c >= 0x20A0 && c <= 0x20CF) || // currency + (c >= 0x2100 && c <= 0x214F) || // letterlike + (c >= 0x2190 && c <= 0x21FF) || // arrows + (c >= 0x2600 && c <= 0x26FF) || // misc symbols + (c >= 0x1F300 && c <= 0x1FAFF); // emoji block + } + + // ===== 组合 ===== + + static constexpr bool isAlnum(U32 c) noexcept { return isAlpha(c) || isDigit(c); } + + static constexpr bool isHexDigit(U32 c) noexcept { return isAsciiHexDigit(c); } + + static constexpr bool isIdentifierStart(U32 c) noexcept { return isAlpha(c) || c == U'_'; } + + static constexpr bool isIdentifierContinue(U32 c) noexcept { return isAlnum(c) || c == U'_'; } + }; +}; \ No newline at end of file diff --git a/src/Deps/String/String.hpp b/src/Deps/String/String.hpp new file mode 100644 index 0000000..ab308cc --- /dev/null +++ b/src/Deps/String/String.hpp @@ -0,0 +1,960 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace Fig::Deps +{ + class StringUtils + { + public: + static bool is_pure_ascii(const char *data, size_t n) noexcept + { + for (size_t i = 0; i < n; ++i) + { + if (static_cast(data[i]) >= 128) return false; + } + return true; + } + + static bool is_pure_ascii(const char32_t *data, size_t n) noexcept + { + for (size_t i = 0; i < n; ++i) + { + if (data[i] >= 128) return false; + } + return true; + } + + static size_t utf8_decode_one(const char *s, size_t n, char32_t &out) + { + unsigned char c0 = static_cast(s[0]); + + if (c0 < 0x80) + { + out = c0; + return 1; + } + + if ((c0 >> 5) == 0x6 && n >= 2) + { + unsigned char c1 = static_cast(s[1]); + out = ((c0 & 0x1F) << 6) | (c1 & 0x3F); + return 2; + } + + if ((c0 >> 4) == 0xE && n >= 3) + { + unsigned char c1 = static_cast(s[1]); + unsigned char c2 = static_cast(s[2]); + out = ((c0 & 0x0F) << 12) | ((c1 & 0x3F) << 6) | (c2 & 0x3F); + return 3; + } + + if ((c0 >> 3) == 0x1E && n >= 4) + { + unsigned char c1 = static_cast(s[1]); + unsigned char c2 = static_cast(s[2]); + unsigned char c3 = static_cast(s[3]); + out = ((c0 & 0x07) << 18) | ((c1 & 0x3F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F); + return 4; + } + + out = 0xFFFD; + return 1; + } + }; + + class String + { + public: + using u32 = char32_t; + static constexpr uint8_t SSO_SIZE = 22; + + enum class Mode : uint8_t + { + ASCII_SSO, // ASCII + ASCII_HEP, // ASCII heap + UTF32_HEP, // UTF32 heap + }; + + private: + Mode mode = Mode::ASCII_SSO; + union + { + unsigned char sso[SSO_SIZE]; // non null terminate + std::vector ascii; + std::vector utf32; + }; + + size_t _length = 0; + + void copyfrom(const String &other) + { + destroy(); + _length = other._length; + mode = other.mode; + + if (mode == Mode::ASCII_SSO) { memcpy(sso, other.sso, sizeof(unsigned char) * _length); } + else if (mode == Mode::ASCII_HEP) { new (&ascii) std::vector(other.ascii); } + else + { + new (&utf32) std::vector(other.utf32); + } + } + + void movefrom(String &&other) noexcept + { + destroy(); + + mode = other.mode; + _length = other._length; + + switch (mode) + { + case Mode::ASCII_SSO: std::memcpy(sso, other.sso, other._length); break; + + case Mode::ASCII_HEP: new (&ascii) std::vector(std::move(other.ascii)); break; + + case Mode::UTF32_HEP: new (&utf32) std::vector(std::move(other.utf32)); break; + } + + other.mode = Mode::ASCII_SSO; + other._length = 0; + } + + void destroy() noexcept + { + if (mode == Mode::ASCII_SSO) + { + // pass + } + if (mode == Mode::ASCII_HEP) { ascii.~vector(); } + if (mode == Mode::UTF32_HEP) { utf32.~vector(); } + } + + void ensure_utf32() + { + if (mode == Mode::UTF32_HEP) return; + + std::vector tmp; + tmp.reserve(_length); + + if (mode == Mode::ASCII_SSO) + { + for (size_t i = 0; i < _length; ++i) tmp.push_back(static_cast(sso[i])); + } + else // ASCII_HEP + { + for (unsigned char c : ascii) tmp.push_back(static_cast(c)); + } + + destroy(); + mode = Mode::UTF32_HEP; + new (&utf32) std::vector(std::move(tmp)); + } + + void promote_sso_ascii_to_heap() noexcept + { + assert(mode == Mode::ASCII_SSO && "promote_sso_ascii_to_heap: mode is not ascii sso"); + mode = Mode::ASCII_HEP; + + std::vector tmp; + tmp.reserve(_length); + for (size_t i = 0; i < _length; ++i) tmp.push_back(sso[i]); + + mode = Mode::ASCII_HEP; + new (&ascii) std::vector(std::move(tmp)); + } + + void init(const char *data) + { + assert(data); + size_t n = std::strlen(data); + init(data, n); + } + + void init(const char *data, size_t n) + { + destroy(); + + _length = 0; + + // ASCII 快路径 + if (n <= SSO_SIZE && StringUtils::is_pure_ascii(data, n)) + { + mode = Mode::ASCII_SSO; + std::memcpy(sso, data, n); + _length = n; + return; + } + + if (StringUtils::is_pure_ascii(data, n)) + { + mode = Mode::ASCII_HEP; + new (&ascii) std::vector(data, data + n); + _length = n; + return; + } + + // UTF-8 decode + mode = Mode::UTF32_HEP; + new (&utf32) std::vector(); + utf32.reserve(n); + + for (size_t i = 0; i < n;) + { + u32 cp; + size_t step = StringUtils::utf8_decode_one(data + i, n - i, cp); + utf32.push_back(cp); + i += step; + } + + utf32.shrink_to_fit(); + _length = utf32.size(); + } + + void init(const u32 *data) + { + assert(data); + size_t n = 0; + while (data[n] != 0) ++n; + init(data, n); + } + + void init(const u32 *data, size_t n) + { + destroy(); + + _length = n; + + if (n <= SSO_SIZE && StringUtils::is_pure_ascii(data, n)) + { + mode = Mode::ASCII_SSO; + for (size_t i = 0; i < n; ++i) sso[i] = static_cast(data[i]); + return; + } + + if (StringUtils::is_pure_ascii(data, n)) + { + mode = Mode::ASCII_HEP; + new (&ascii) std::vector(); + ascii.reserve(n); + for (size_t i = 0; i < n; ++i) ascii.push_back(static_cast(data[i])); + return; + } + + mode = Mode::UTF32_HEP; + new (&utf32) std::vector(); + utf32.assign(data, data + n); + } + + public: + size_t length() const noexcept { return _length; } + size_t size() const noexcept { return _length; } + + bool empty() const noexcept { return _length == 0; } + void reserve(size_t n) + { + if (mode == Mode::ASCII_HEP) + ascii.reserve(n); + else if (mode == Mode::UTF32_HEP) + utf32.reserve(n); + } + + void clear() noexcept + { + _length = 0; + if (mode == Mode::ASCII_SSO) + { + // pass + } + if (mode == Mode::ASCII_HEP) { ascii.clear(); } + else + { + utf32.clear(); + } + } + + void shrink_to_fit() noexcept + { + if (mode == Mode::ASCII_HEP) { ascii.shrink_to_fit(); } + else + { + utf32.shrink_to_fit(); + } + } + + ~String() noexcept { destroy(); } + String() noexcept + { + mode = Mode::ASCII_SSO; + _length = 0; + } + String(const String &other) noexcept { copyfrom(other); } + String(String &&other) noexcept { movefrom(std::move(other)); } + String(const char *str) { init(str); } + String(const char32_t *str) { init(str); } + String(const std::string &s) { init(s.data(), s.size()); } + + static String fromPureAscii(const char *str) + { + String string; + string._length = std::strlen(str); + if (string._length <= SSO_SIZE) { memcpy(string.sso, str, string._length); } + else + { + string.ascii.reserve(string._length); + for (size_t i = 0; i < string._length; ++i) { string.ascii.push_back(str[i]); } + } + + return string; + } + + String &operator=(const String &other) + { + if (this != &other) + { + destroy(); + copyfrom(other); + } + return *this; + } + + String &operator=(String &&other) noexcept + { + if (this != &other) movefrom(std::move(other)); + return *this; + } + + String &operator+=(const String &rhs) + { + if (rhs._length == 0) return *this; + + // 两边都是 ASCII + bool this_ascii = (mode == Mode::ASCII_SSO || mode == Mode::ASCII_HEP); + bool rhs_ascii = (rhs.mode == Mode::ASCII_SSO || rhs.mode == Mode::ASCII_HEP); + + if (this_ascii && rhs_ascii) + { + size_t newlen = _length + rhs._length; + + // SSO 可容纳 + if (mode == Mode::ASCII_SSO && newlen <= SSO_SIZE) + { + if (rhs.mode == Mode::ASCII_SSO) + std::memcpy(sso + _length, rhs.sso, rhs._length); + else + std::memcpy(sso + _length, rhs.ascii.data(), rhs._length); + + _length = newlen; + return *this; + } + + if (mode == Mode::ASCII_SSO) promote_sso_ascii_to_heap(); + + // 追加 + if (rhs.mode == Mode::ASCII_SSO) + ascii.insert(ascii.end(), rhs.sso, rhs.sso + rhs._length); + else + ascii.insert(ascii.end(), rhs.ascii.begin(), rhs.ascii.end()); + + _length = newlen; + return *this; + } + + // 必须 UTF32 + + if (mode != Mode::UTF32_HEP) + { + std::vector tmp; + tmp.reserve(_length + rhs._length); + + if (mode == Mode::ASCII_SSO) + { + for (size_t i = 0; i < _length; ++i) tmp.push_back(static_cast(sso[i])); + } + else // ASCII_HEP + { + for (unsigned char c : ascii) tmp.push_back(static_cast(c)); + } + + destroy(); + mode = Mode::UTF32_HEP; + new (&utf32) std::vector(std::move(tmp)); + } + + if (rhs.mode == Mode::UTF32_HEP) { utf32.insert(utf32.end(), rhs.utf32.begin(), rhs.utf32.end()); } + else if (rhs.mode == Mode::ASCII_SSO) + { + for (size_t i = 0; i < rhs._length; ++i) utf32.push_back(static_cast(rhs.sso[i])); + } + else // ASCII_HEP + { + for (unsigned char c : rhs.ascii) utf32.push_back(static_cast(c)); + } + + _length = utf32.size(); + return *this; + } + + String &operator+=(const char *utf8) + { + String tmp(utf8); + return (*this += tmp); + } + + friend String operator+(String lhs, const String &rhs) + { + lhs += rhs; + return lhs; + } + + void push_back(u32 cp) + { + if (cp < 128) + { + if (mode == Mode::ASCII_SSO && _length < SSO_SIZE) + { + sso[_length++] = static_cast(cp); + return; + } + + if (mode == Mode::ASCII_SSO) promote_sso_ascii_to_heap(); + + if (mode == Mode::ASCII_HEP) + { + ascii.push_back(static_cast(cp)); + ++_length; + return; + } + } + + ensure_utf32(); + utf32.push_back(cp); + _length = utf32.size(); + } + + void pop_back() + { + assert(_length > 0); + + if (mode == Mode::ASCII_SSO) + { + --_length; + return; + } + + if (mode == Mode::ASCII_HEP) + { + ascii.pop_back(); + --_length; + return; + } + + utf32.pop_back(); + _length = utf32.size(); + } + + String &append(const char *utf8) + { + String tmp(utf8); + *this += tmp; + return *this; + } + + String &append(const char32_t *u32str) + { + String tmp(u32str); + *this += tmp; + return *this; + } + + String &append(size_t count, u32 cp) + { + for (size_t i = 0; i < count; ++i) push_back(cp); + return *this; + } + + void resize(size_t new_size, u32 fill = 0) + { + if (new_size <= _length) + { + erase(new_size); + return; + } + + append(new_size - _length, fill); + } + + u32 front() const + { + assert(_length > 0); + return (*this)[0]; + } + + u32 back() const + { + assert(_length > 0); + return (*this)[_length - 1]; + } + + std::string toStdString() const + { + std::string out; + + if (mode == Mode::ASCII_SSO) + { + out.assign(reinterpret_cast(sso), _length); + return out; + } + + if (mode == Mode::ASCII_HEP) + { + out.assign(ascii.begin(), ascii.end()); + return out; + } + + // UTF32_HEP -> UTF-8 encode + for (u32 cp : utf32) + { + if (cp <= 0x7F) { out.push_back(static_cast(cp)); } + else if (cp <= 0x7FF) + { + out.push_back(static_cast(0xC0 | (cp >> 6))); + out.push_back(static_cast(0x80 | (cp & 0x3F))); + } + else if (cp <= 0xFFFF) + { + out.push_back(static_cast(0xE0 | (cp >> 12))); + out.push_back(static_cast(0x80 | ((cp >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (cp & 0x3F))); + } + else if (cp <= 0x10FFFF) + { + out.push_back(static_cast(0xF0 | (cp >> 18))); + out.push_back(static_cast(0x80 | ((cp >> 12) & 0x3F))); + out.push_back(static_cast(0x80 | ((cp >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (cp & 0x3F))); + } + // 非法码点 + } + + return out; + } + friend std::ostream &operator<<(std::ostream &os, const String &s) { return os << s.toStdString(); } + + friend bool operator==(const String &a, const String &b) noexcept + { + if (a._length != b._length) return false; + + // 同模式 + if (a.mode == b.mode) + { + if (a.mode == Mode::ASCII_SSO) return std::memcmp(a.sso, b.sso, a._length) == 0; + + if (a.mode == Mode::ASCII_HEP) return a.ascii == b.ascii; + + return a.utf32 == b.utf32; + } + + // 不同模式ASCII / UTF32 + const bool a_ascii = (a.mode == Mode::ASCII_SSO || a.mode == Mode::ASCII_HEP); + const bool b_ascii = (b.mode == Mode::ASCII_SSO || b.mode == Mode::ASCII_HEP); + + if (a_ascii && b_ascii) + { + if (a.mode == Mode::ASCII_SSO) + return std::memcmp(a.sso, b.ascii.data(), a._length) == 0; + else + return std::memcmp(a.ascii.data(), b.sso, a._length) == 0; + } + + // ASCII / UTF32 + const String &ascii_str = a_ascii ? a : b; + const String &utf32_str = a_ascii ? b : a; + + if (ascii_str.mode == Mode::ASCII_SSO) + { + for (size_t i = 0; i < ascii_str._length; ++i) + if (static_cast(ascii_str.sso[i]) != utf32_str.utf32[i]) return false; + } + else + { + for (size_t i = 0; i < ascii_str._length; ++i) + if (static_cast(ascii_str.ascii[i]) != utf32_str.utf32[i]) return false; + } + + return true; + } + + friend bool operator!=(const String &a, const String &b) noexcept { return !(a == b); } + // std::hash + friend struct std::hash; + + // read only + u32 operator[](size_t i) const + { + assert(i < _length); + + if (mode == Mode::ASCII_SSO) return static_cast(sso[i]); + if (mode == Mode::ASCII_HEP) return static_cast(ascii[i]); + return utf32[i]; + } + u32 at(size_t i) const + { + if (i >= _length) throw std::out_of_range("String::at"); + return (*this)[i]; + } + + bool starts_with(const String &prefix) const + { + if (prefix._length > _length) return false; + + for (size_t i = 0; i < prefix._length; ++i) + if ((*this)[i] != prefix[i]) return false; + + return true; + } + + bool ends_with(const String &suffix) const + { + if (suffix._length > _length) return false; + + size_t offset = _length - suffix._length; + + for (size_t i = 0; i < suffix._length; ++i) + if ((*this)[offset + i] != suffix[i]) return false; + + return true; + } + + bool contains(u32 cp) const + { + if (mode == Mode::ASCII_SSO) + { + for (size_t i = 0; i < _length; ++i) + if (sso[i] == cp) return true; + return false; + } + + if (mode == Mode::ASCII_HEP) + { + if (cp >= 128) return false; + for (unsigned char c : ascii) + if (c == cp) return true; + return false; + } + + for (u32 c : utf32) + if (c == cp) return true; + + return false; + } + + String substr(size_t pos, size_t count = size_t(-1)) const + { + if (pos >= _length) return String(); + + size_t len = (_length - pos < count) ? (_length - pos) : count; + + String out; + + // ASCII_SSO + if (mode == Mode::ASCII_SSO) + { + if (len <= SSO_SIZE) + { + out.mode = Mode::ASCII_SSO; + std::memcpy(out.sso, sso + pos, len); + out._length = len; + } + else + { + out.mode = Mode::ASCII_HEP; + new (&out.ascii) std::vector(sso + pos, sso + pos + len); + out._length = len; + } + return out; + } + + // ASCII_HEP + if (mode == Mode::ASCII_HEP) + { + if (len <= SSO_SIZE) + { + out.mode = Mode::ASCII_SSO; + std::memcpy(out.sso, ascii.data() + pos, len); + out._length = len; + } + else + { + out.mode = Mode::ASCII_HEP; + new (&out.ascii) std::vector(ascii.begin() + pos, ascii.begin() + pos + len); + out._length = len; + } + return out; + } + + // UTF32 + out.mode = Mode::UTF32_HEP; + new (&out.utf32) std::vector(utf32.begin() + pos, utf32.begin() + pos + len); + out._length = len; + return out; + } + + String &erase(size_t pos, size_t count = size_t(-1)) + { + if (pos >= _length) return *this; + + size_t len = (_length - pos < count) ? (_length - pos) : count; + + if (mode == Mode::ASCII_SSO) + { + std::memmove(sso + pos, sso + pos + len, _length - pos - len); + _length -= len; + return *this; + } + + if (mode == Mode::ASCII_HEP) + { + ascii.erase(ascii.begin() + pos, ascii.begin() + pos + len); + _length -= len; + return *this; + } + + utf32.erase(utf32.begin() + pos, utf32.begin() + pos + len); + _length = utf32.size(); + return *this; + } + + String &insert(size_t pos, const String &other) + { + if (pos > _length) pos = _length; + if (other._length == 0) return *this; + + bool this_ascii = (mode != Mode::UTF32_HEP); + bool other_ascii = (other.mode != Mode::UTF32_HEP); + + // ASCII 合并路径 + if (this_ascii && other_ascii) + { + size_t newlen = _length + other._length; + + if (mode == Mode::ASCII_SSO && newlen <= SSO_SIZE) + { + std::memmove(sso + pos + other._length, sso + pos, _length - pos); + + if (other.mode == Mode::ASCII_SSO) + std::memcpy(sso + pos, other.sso, other._length); + else + std::memcpy(sso + pos, other.ascii.data(), other._length); + + _length = newlen; + return *this; + } + + if (mode == Mode::ASCII_SSO) promote_sso_ascii_to_heap(); + + if (other.mode == Mode::ASCII_SSO) + ascii.insert(ascii.begin() + pos, other.sso, other.sso + other._length); + else + ascii.insert(ascii.begin() + pos, other.ascii.begin(), other.ascii.end()); + + _length = newlen; + return *this; + } + + // UTF32 路径 + ensure_utf32(); + + if (other.mode == Mode::UTF32_HEP) + utf32.insert(utf32.begin() + pos, other.utf32.begin(), other.utf32.end()); + else if (other.mode == Mode::ASCII_SSO) + for (size_t i = 0; i < other._length; ++i) + utf32.insert(utf32.begin() + pos + i, static_cast(other.sso[i])); + else + for (size_t i = 0; i < other._length; ++i) + utf32.insert(utf32.begin() + pos + i, static_cast(other.ascii[i])); + + _length = utf32.size(); + return *this; + } + + int compare(const String &other) const noexcept + { + size_t n = (_length < other._length) ? _length : other._length; + + for (size_t i = 0; i < n; ++i) + { + u32 a = (*this)[i]; + u32 b = other[i]; + if (a != b) return (a < b) ? -1 : 1; + } + + if (_length == other._length) return 0; + return (_length < other._length) ? -1 : 1; + } + + size_t find(const String &needle, size_t pos = 0) const + { + if (needle._length == 0) return pos <= _length ? pos : size_t(-1); + if (needle._length > _length || pos >= _length) return size_t(-1); + + size_t limit = _length - needle._length; + + for (size_t i = pos; i <= limit; ++i) + { + size_t j = 0; + for (; j < needle._length; ++j) + if ((*this)[i + j] != needle[j]) break; + + if (j == needle._length) return i; + } + + return size_t(-1); + } + + size_t rfind(const String &needle) const + { + if (needle._length == 0) return _length; + if (needle._length > _length) return size_t(-1); + + for (size_t i = _length - needle._length + 1; i-- > 0;) + { + size_t j = 0; + for (; j < needle._length; ++j) + if ((*this)[i + j] != needle[j]) break; + + if (j == needle._length) return i; + } + + return size_t(-1); + } + + String &replace(size_t pos, size_t len, const String &repl) + { + if (pos >= _length) return *this; + + size_t erase_len = (_length - pos < len) ? (_length - pos) : len; + + // ASCII路径 + bool this_ascii = (mode != Mode::UTF32_HEP); + bool repl_ascii = (repl.mode != Mode::UTF32_HEP); + + if (this_ascii && repl_ascii) + { + size_t newlen = _length - erase_len + repl._length; + + // SSO容纳 + if (mode == Mode::ASCII_SSO && newlen <= SSO_SIZE) + { + std::memmove(sso + pos + repl._length, sso + pos + erase_len, _length - pos - erase_len); + + if (repl.mode == Mode::ASCII_SSO) + std::memcpy(sso + pos, repl.sso, repl._length); + else + std::memcpy(sso + pos, repl.ascii.data(), repl._length); + + _length = newlen; + return *this; + } + + if (mode == Mode::ASCII_SSO) promote_sso_ascii_to_heap(); + + ascii.erase(ascii.begin() + pos, ascii.begin() + pos + erase_len); + + if (repl.mode == Mode::ASCII_SSO) + ascii.insert(ascii.begin() + pos, repl.sso, repl.sso + repl._length); + else + ascii.insert(ascii.begin() + pos, repl.ascii.begin(), repl.ascii.end()); + + _length = newlen; + return *this; + } + + // UTF32路径 + ensure_utf32(); + + utf32.erase(utf32.begin() + pos, utf32.begin() + pos + erase_len); + + if (repl.mode == Mode::UTF32_HEP) + utf32.insert(utf32.begin() + pos, repl.utf32.begin(), repl.utf32.end()); + else if (repl.mode == Mode::ASCII_SSO) + for (size_t i = 0; i < repl._length; ++i) + utf32.insert(utf32.begin() + pos + i, static_cast(repl.sso[i])); + else + for (size_t i = 0; i < repl._length; ++i) + utf32.insert(utf32.begin() + pos + i, static_cast(repl.ascii[i])); + + _length = utf32.size(); + return *this; + } + }; +}; // namespace Fig::Deps + +namespace std +{ + template <> + struct hash + { + size_t operator()(const Fig::Deps::String &s) const noexcept + { + using String = Fig::Deps::String; + using u32 = String::u32; + + const size_t FNV_offset = 1469598103934665603ull; + const size_t FNV_prime = 1099511628211ull; + + size_t h = FNV_offset; + + if (s.mode == String::Mode::ASCII_SSO) + { + for (size_t i = 0; i < s._length; ++i) + { + h ^= s.sso[i]; + h *= FNV_prime; + } + return h; + } + + if (s.mode == String::Mode::ASCII_HEP) + { + for (unsigned char c : s.ascii) + { + h ^= c; + h *= FNV_prime; + } + return h; + } + + // UTF32 + for (u32 cp : s.utf32) + { + h ^= static_cast(cp); + h *= FNV_prime; + } + + return h; + } + }; + + template <> + struct std::formatter + { + // 不支持自定义格式说明符 + constexpr auto parse(std::format_parse_context &ctx) { return ctx.begin(); } + + template + auto format(const Fig::Deps::String &s, FormatContext &ctx) const + { + return std::format_to(ctx.out(), "{}", s.toStdString()); + } + }; + +} // namespace std diff --git a/src/Deps/String/StringTest.cpp b/src/Deps/String/StringTest.cpp new file mode 100644 index 0000000..a633859 --- /dev/null +++ b/src/Deps/String/StringTest.cpp @@ -0,0 +1,137 @@ +#include +#include +#include "String.hpp" + +using Fig::Deps::String; + +static void test_ascii_sso() +{ + String s("hello"); + assert(s.size() == 5); + assert(s[0] == U'h'); + assert(s.toStdString() == "hello"); + + s.push_back(U'!'); + assert(s.toStdString() == "hello!"); + + s.pop_back(); + assert(s.toStdString() == "hello"); + + assert(s.starts_with("he")); + assert(s.ends_with("lo")); + assert(s.contains(U'e')); +} + +static void test_ascii_heap() +{ + String a("abcdefghijklmnopqrstuvwxyz"); // > SSO + assert(a.size() == 26); + + String b("123"); + a += b; + + assert(a.ends_with("123")); + assert(a.find(U'1') == 26); +} + +static void test_utf8_decode() +{ + String s("你好"); + assert(s.size() == 2); + assert(s.toStdString() == "你好"); + + s.push_back(U'!'); + assert(s.toStdString() == "你好!"); +} + +static void test_concat_modes() +{ + String a("abc"); + String b("你好"); + + String c = a + b; + assert(c.size() == 5); + assert(c.toStdString() == "abc你好"); + + String d = b + a; + assert(d.toStdString() == "你好abc"); +} + +static void test_substr_erase_insert() +{ + String s("abcdef"); + + String sub = s.substr(2, 3); + assert(sub.toStdString() == "cde"); + + s.erase(2, 2); + assert(s.toStdString() == "abef"); + + s.insert(2, String("CD")); + assert(s.toStdString() == "abCDef"); +} + +static void test_replace() +{ + String s("hello world"); + s.replace(6, 5, String("Fig")); + assert(s.toStdString() == "hello Fig"); +} + +static void test_find_rfind() +{ + String s("abcabcabc"); + + assert(s.find(String("abc")) == 0); + assert(s.find(String("abc"), 1) == 3); + assert(s.rfind(String("abc")) == 6); +} + +static void test_compare() +{ + String a("abc"); + String b("abd"); + String c("abc"); + + assert(a.compare(b) < 0); + assert(b.compare(a) > 0); + assert(a.compare(c) == 0); + assert(a == c); + assert(a != b); +} + +static void test_resize_append() +{ + String s("abc"); + s.resize(5, U'x'); + assert(s.toStdString() == "abcxx"); + + s.append(3, U'y'); + assert(s.toStdString() == "abcxxyyy"); +} + +static void test_std_interop() +{ + std::string stds = "hello"; + String s(stds); + assert(s.toStdString() == "hello"); + + s += " world"; + assert(s.toStdString() == "hello world"); +} + +int main() +{ + test_ascii_sso(); + test_ascii_heap(); + test_utf8_decode(); + test_concat_modes(); + test_substr_erase_insert(); + test_replace(); + test_find_rfind(); + test_compare(); + test_resize_append(); + test_std_interop(); + + std::cout << "All String tests passed.\n"; +} diff --git a/src/Error/Error.cpp b/src/Error/Error.cpp new file mode 100644 index 0000000..52742bd --- /dev/null +++ b/src/Error/Error.cpp @@ -0,0 +1,149 @@ +#include +#include + +#include + +namespace Fig +{ + void ColoredPrint(const char *color, const char *msg, std::ostream &ost = CoreIO::GetStdErr()) + { + ost << color << msg << TerminalColors::Reset; + } + + void ColoredPrint(const char *color, const std::string &msg, std::ostream &ost = CoreIO::GetStdErr()) + { + ost << color << msg << TerminalColors::Reset; + } + + void ColoredPrint(const char *color, const String &msg, std::ostream &ost = CoreIO::GetStdErr()) + { + ost << color << msg << TerminalColors::Reset; + } + + std::string MultipleStr(const char *c, size_t n) + { + std::string buf; + for (size_t i = 0; i < n; ++i) { buf += c; } + return buf; + } + + const char *ErrorTypeToString(ErrorType type) + { + switch (type) + { + case ErrorType::UnusedSymbol: return "UnusedSymbol"; + + case ErrorType::MayBeNull: return "MaybeNull"; + + case ErrorType::UnterminatedString: return "UnterminatedString"; + case ErrorType::SyntaxError: return "SyntaxError"; + default: return "Some one forgot to add case to `ErrorTypeToString`"; + } + } + + void PrintSystemInfos() + { + std::ostream &err = CoreIO::GetStdErr(); + std::stringstream build_info; + build_info << "\r🌘 Fig v" << Core::VERSION << " on " << Core::PLATFORM << ' ' << Core::ARCH << '[' + << Core::COMPILER << ']' << '\n' + << " Build Time: " << Core::COMPILE_TIME; + + const std::string &build_info_str = build_info.str(); + err << MultipleStr("─", build_info_str.size()) << '\n'; + err << build_info_str << '\n'; + err << MultipleStr("─", build_info_str.size()) << '\n'; + } + + void PrintErrorInfo(const Error &error, const SourceManager &srcManager) + { + static constexpr const char *MinorColor = "\033[38;2;138;227;198m"; + static constexpr const char *MediumColor = "\033[38;2;255;199;95m"; + static constexpr const char *CriticalColor = "\033[38;2;255;107;107m"; + + namespace TC = TerminalColors; + std::ostream &err = CoreIO::GetStdErr(); + + uint8_t level = ErrorLevel(error.type); + // const char *level_name = (level == 1 ? "Minor" : (level == 2 ? "Medium" : "Critical")); + const char *level_color = (level == 1 ? MinorColor : (level == 2 ? MediumColor : CriticalColor)); + + err << "🔥 " + << level_color + //<< '(' << level_name << ')' + << 'E' << static_cast(error.type) << TC::Reset << ": " << level_color << ErrorTypeToString(error.type) + << TC::Reset << '\n'; + + const SourceLocation &location = error.location; + + err << TC::DarkGray << " ┌─> " << TC::Cyan << location.fileName << " " << TC::DarkGray << location.sp.line + << ":" << location.sp.column << '\n'; + err << TC::DarkGray << " │" << '\n' << " │" << TC::Reset << '\n'; + + // 尝试打印上3行 下2行 + + int64_t line_start = location.sp.line - 3, line_end = location.sp.line + 2; + while (!srcManager.HasLine(line_end)) { --line_end; } + while (!srcManager.HasLine(line_start)) { ++line_start; } + + const auto &getLineNumWidth = [](size_t l) { + unsigned int cnt = 0; + while (l != 0) + { + l = l / 10; + cnt++; + } + return cnt; + }; + unsigned int max_line_number_width = getLineNumWidth(line_end); + for (size_t i = line_start; i <= line_end; ++i) + { + unsigned int offset = 2 + 2 + 1; + // ' └─ ' + if (i == location.sp.line) { err << TC::DarkGray << " └─ " << TC::Reset; } + else if (i < location.sp.line) { err << TC::DarkGray << " │ " << TC::Reset; } + else + { + err << MultipleStr(" ", offset); + } + unsigned int cur_line_number_width = getLineNumWidth(i); + + err << MultipleStr(" ", max_line_number_width - cur_line_number_width) << TC::Yellow << i << TC::Reset; + err << " │ " << srcManager.GetLine(i) << '\n'; + if (i == location.sp.line) + { + unsigned int error_col_offset = offset + 1 + max_line_number_width + 2; + err << MultipleStr(" ", error_col_offset) << MultipleStr(" ", location.sp.column - 1) << TC::LightGreen + << MultipleStr("^", location.sp.tok_length) << TC::Reset << '\n'; + + err << MultipleStr(" ", error_col_offset) + << MultipleStr(" ", location.sp.column - 1 + location.sp.tok_length / 2) << "╰─ " << level_color + << error.message << TC::Reset << "\n\n"; + } + } + err << "\n"; + err << "❓ " << TC::DarkGray << "Thrower: " << error.thrower_loc.function_name() << " (" + << error.thrower_loc.file_name() << ":" << error.thrower_loc.line() << ")" << TC::Reset << "\n"; + err << "💡 " << TC::Blue << "Suggestion: " << error.suggestion << TC::Reset; + } + + void ReportError(const Error &error, const SourceManager &srcManager) + { + assert(srcManager.read && "ReportError: srcManager doesn't read source"); + assert(srcManager.HasLine(error.location.sp.line)); + + PrintSystemInfos(); + PrintErrorInfo(error, srcManager); + } + + void ReportErrors(const std::vector &errors, const SourceManager &srcManager) + { + std::ostream &ost = CoreIO::GetStdErr(); + PrintSystemInfos(); + for (const auto &err : errors) + { + PrintErrorInfo(err, srcManager); + ost << '\n'; + } + } +}; // namespace Fig \ No newline at end of file diff --git a/src/Error/Error.hpp b/src/Error/Error.hpp new file mode 100644 index 0000000..97fa942 --- /dev/null +++ b/src/Error/Error.hpp @@ -0,0 +1,138 @@ +#pragma once + +#include +#include +#include + +#include + +namespace Fig +{ + /* + 0-1000 Minor + 1001-2000 Medium + 2001-3000 Critical + */ + enum class ErrorType : unsigned int + { + UnusedSymbol = 0, + + MayBeNull = 1001, + + UnterminatedString = 2001, + SyntaxError, + }; + + const char *ErrorTypeToString(ErrorType type); + + struct Error + { + ErrorType type; + String message; + String suggestion; + + SourceLocation location; + std::source_location thrower_loc; + + Error() {} + Error(ErrorType _type, + const String &_message, + const String &_suggestion, + const SourceLocation &_location, + const std::source_location &_throwerloc = std::source_location::current()) + { + type = _type; + message = _message; + suggestion = _suggestion; + location = _location; + thrower_loc = _throwerloc; + } + }; + + namespace TerminalColors + { + constexpr const char *Reset = "\033[0m"; + constexpr const char *Bold = "\033[1m"; + constexpr const char *Dim = "\033[2m"; + constexpr const char *Italic = "\033[3m"; + constexpr const char *Underline = "\033[4m"; + constexpr const char *Blink = "\033[5m"; + constexpr const char *Reverse = "\033[7m"; // 前背景反色 + constexpr const char *Hidden = "\033[8m"; // 隐藏文本 + constexpr const char *Strike = "\033[9m"; // 删除线 + + constexpr const char *Black = "\033[30m"; + constexpr const char *Red = "\033[31m"; + constexpr const char *Green = "\033[32m"; + constexpr const char *Yellow = "\033[33m"; + constexpr const char *Blue = "\033[34m"; + constexpr const char *Magenta = "\033[35m"; + constexpr const char *Cyan = "\033[36m"; + constexpr const char *White = "\033[37m"; + + constexpr const char *LightBlack = "\033[90m"; + constexpr const char *LightRed = "\033[91m"; + constexpr const char *LightGreen = "\033[92m"; + constexpr const char *LightYellow = "\033[93m"; + constexpr const char *LightBlue = "\033[94m"; + constexpr const char *LightMagenta = "\033[95m"; + constexpr const char *LightCyan = "\033[96m"; + constexpr const char *LightWhite = "\033[97m"; + + constexpr const char *DarkRed = "\033[38;2;128;0;0m"; + constexpr const char *DarkGreen = "\033[38;2;0;100;0m"; + constexpr const char *DarkYellow = "\033[38;2;128;128;0m"; + constexpr const char *DarkBlue = "\033[38;2;0;0;128m"; + constexpr const char *DarkMagenta = "\033[38;2;100;0;100m"; + constexpr const char *DarkCyan = "\033[38;2;0;128;128m"; + constexpr const char *DarkGray = "\033[38;2;64;64;64m"; + constexpr const char *Gray = "\033[38;2;128;128;128m"; + constexpr const char *Silver = "\033[38;2;192;192;192m"; + + constexpr const char *Navy = "\033[38;2;0;0;128m"; + constexpr const char *RoyalBlue = "\033[38;2;65;105;225m"; + constexpr const char *ForestGreen = "\033[38;2;34;139;34m"; + constexpr const char *Olive = "\033[38;2;128;128;0m"; + constexpr const char *Teal = "\033[38;2;0;128;128m"; + constexpr const char *Maroon = "\033[38;2;128;0;0m"; + constexpr const char *Purple = "\033[38;2;128;0;128m"; + constexpr const char *Orange = "\033[38;2;255;165;0m"; + constexpr const char *Gold = "\033[38;2;255;215;0m"; + constexpr const char *Pink = "\033[38;2;255;192;203m"; + constexpr const char *Crimson = "\033[38;2;220;20;60m"; + + constexpr const char *OnBlack = "\033[40m"; + constexpr const char *OnRed = "\033[41m"; + constexpr const char *OnGreen = "\033[42m"; + constexpr const char *OnYellow = "\033[43m"; + constexpr const char *OnBlue = "\033[44m"; + constexpr const char *OnMagenta = "\033[45m"; + constexpr const char *OnCyan = "\033[46m"; + constexpr const char *OnWhite = "\033[47m"; + + constexpr const char *OnLightBlack = "\033[100m"; + constexpr const char *OnLightRed = "\033[101m"; + constexpr const char *OnLightGreen = "\033[102m"; + constexpr const char *OnLightYellow = "\033[103m"; + constexpr const char *OnLightBlue = "\033[104m"; + constexpr const char *OnLightMagenta = "\033[105m"; + constexpr const char *OnLightCyan = "\033[106m"; + constexpr const char *OnLightWhite = "\033[107m"; + + constexpr const char *OnDarkBlue = "\033[48;2;0;0;128m"; + constexpr const char *OnGreenYellow = "\033[48;2;173;255;47m"; + constexpr const char *OnOrange = "\033[48;2;255;165;0m"; + constexpr const char *OnGray = "\033[48;2;128;128;128m"; + }; // namespace TerminalColors + + inline uint8_t ErrorLevel(ErrorType t) + { + unsigned int id = static_cast(t); + if (id <= 1000) { return 1; } + if (id > 1000 && id <= 2000) { return 2; } + if (id > 2000) { return 3; } + return 0; + } + + void ReportError(const Error &error, const SourceManager &srcManager); +}; // namespace Fig \ No newline at end of file diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp new file mode 100644 index 0000000..1f26d28 --- /dev/null +++ b/src/Lexer/Lexer.cpp @@ -0,0 +1,6 @@ +#include + +namespace Fig +{ + +}; \ No newline at end of file diff --git a/src/Lexer/Lexer.hpp b/src/Lexer/Lexer.hpp new file mode 100644 index 0000000..842f958 --- /dev/null +++ b/src/Lexer/Lexer.hpp @@ -0,0 +1,123 @@ +/*! + @file src/Lexer/Lexer.hpp + @brief 词法分析器(materialized lexeme) + @author PuqiAR (im@puqiar.top) + @date 2026-02-13 +*/ + +#pragma once + +#include +#include +#include + +namespace Fig +{ + class SourceReader + { + private: + String source; + size_t index; + + SourcePosition pos; + + public: + SourceReader() + { + index = 0; + pos.line = pos.column = 0; + } + SourceReader(const String &_source) // copy + { + source = _source; + index = 0; + pos.line = pos.column = 0; + } + + SourcePosition ¤tPosition() { return pos; } + + inline char32_t current() const + { + assert(index < source.length() && "SourceReader: get current failed, index out of range"); + return source[index]; + } + + inline bool hasNext() const { return index < source.length(); } + + inline char32_t peek() const + { + assert((index + 1) < source.length() && "SourceReader: get peek failed, index out of range"); + return source[index + 1]; + } + + inline char32_t peekIf() const + { + if ((index + 1) < source.length()) { return source[index + 1]; } + return 0xFFFD; + } + + inline char32_t produce() + { + // returns current rune, then next + char32_t c = current(); + next(); + return c; + } + + inline void next() + { + assert(hasNext() && "SrcReader: next failed, need more runes"); + ++index; + + if (current() == U'\n') + { + ++pos.line; + pos.column = 1; + } + else + { + ++pos.column; + } + } + + inline size_t currentIndex() const { return index; } + + inline bool isAtEnd() const { return index == source.length() - 1; } + }; + + class Lexer + { + public: + enum State : uint8_t + { + Normal, + Error + }; + + private: + String fileName; + SourceReader rd; + + protected: + Token scanComments(); + Token scanIdentifierOrKeyword(); + + Token scanNumberLiteral(); + Token scanStringLiteral(); + Token scanBoolLiteral(); + Token scanLiteralNull(); + + Token scanPunct(); + public: + State state = Normal; + + Lexer() {} + Lexer(const String &source, String _fileName) + { + rd = SourceReader(source); + fileName = std::move(_fileName); + } + + Token NextToken(); + }; +}; // namespace Fig \ No newline at end of file diff --git a/src/SourceManager/SourceManager.hpp b/src/SourceManager/SourceManager.hpp new file mode 100644 index 0000000..655b164 --- /dev/null +++ b/src/SourceManager/SourceManager.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include +#include + +#include + +namespace Fig +{ + class SourceManager + { + private: + String filePath; + String source; + std::vector lines; + + public: + bool read = false; + String &Read() + { + std::fstream fs(filePath.toStdString()); + if (!fs.is_open()) + { + read = false; + return source; + } + std::string line; + while (std::getline(fs, line)) + { + source += line + '\n'; + lines.push_back(String(line)); + } + read = true; + return source; + } + + SourceManager() {} + SourceManager(String _path) { filePath = std::move(_path); } + + bool HasLine(int64_t _line) const + { + return _line <= lines.size() && _line >= 1; + } + + String GetLine(size_t _line) const + { + assert(_line <= lines.size() && "SourceManager: GetLine failed, index out of range"); + return lines[_line - 1]; + } + + String GetSub(size_t _index_start, size_t _length) const + { + return source.substr(_index_start, _length); + } + }; +}; \ No newline at end of file diff --git a/src/Token/Token.cpp b/src/Token/Token.cpp new file mode 100644 index 0000000..210179c --- /dev/null +++ b/src/Token/Token.cpp @@ -0,0 +1,87 @@ +#include + +namespace Fig +{ + const HashMap Token::symbolMap = { + // 三字符 + {String("..."), TokenType::TripleDot}, + // 双字符 + {String("=="), TokenType::Equal}, + {String("!="), TokenType::NotEqual}, + {String("<="), TokenType::LessEqual}, + {String(">="), TokenType::GreaterEqual}, + {String("<<"), TokenType::ShiftLeft}, + {String(">>"), TokenType::ShiftRight}, + {String("+="), TokenType::PlusEqual}, + {String("-="), TokenType::MinusEqual}, + {String("*="), TokenType::AsteriskEqual}, + {String("/="), TokenType::SlashEqual}, + {String("%="), TokenType::PercentEqual}, + {String("^="), TokenType::CaretEqual}, + {String("++"), TokenType::DoublePlus}, + {String("--"), TokenType::DoubleMinus}, + {String("&&"), TokenType::DoubleAmpersand}, + {String("||"), TokenType::DoublePipe}, + {String(":="), TokenType::Walrus}, + {String("**"), TokenType::Power}, + {String("->"), TokenType::RightArrow}, + {String("=>"), TokenType::DoubleArrow}, + + // 单字符 + {String("+"), TokenType::Plus}, + {String("-"), TokenType::Minus}, + {String("*"), TokenType::Asterisk}, + {String("/"), TokenType::Slash}, + {String("%"), TokenType::Percent}, + {String("^"), TokenType::Caret}, + {String("&"), TokenType::Ampersand}, + {String("|"), TokenType::Pipe}, + {String("~"), TokenType::Tilde}, + {String("="), TokenType::Assign}, + {String("<"), TokenType::Less}, + {String(">"), TokenType::Greater}, + {String("."), TokenType::Dot}, + {String(","), TokenType::Comma}, + {String(":"), TokenType::Colon}, + {String(";"), TokenType::Semicolon}, + {String("'"), TokenType::SingleQuote}, + {String("\""), TokenType::DoubleQuote}, + {String("("), TokenType::LeftParen}, + {String(")"), TokenType::RightParen}, + {String("["), TokenType::LeftBracket}, + {String("]"), TokenType::RightBracket}, + {String("{"), TokenType::LeftBrace}, + {String("}"), TokenType::RightBrace}, + {String("?"), TokenType::Question}, + {String("!"), TokenType::Not}, + }; + + const HashMap Token::keywordMap{ + {String("and"), TokenType::And}, + {String("or"), TokenType::Or}, + {String("not"), TokenType::Not}, + {String("import"), TokenType::Import}, + {String("func"), TokenType::Function}, + {String("var"), TokenType::Variable}, + {String("const"), TokenType::Const}, + // {String("final"), TokenType::Final}, + {String("while"), TokenType::While}, + {String("for"), TokenType::For}, + {String("if"), TokenType::If}, + {String("else"), TokenType::Else}, + {String("new"), TokenType::New}, + {String("struct"), TokenType::Struct}, + {String("interface"), TokenType::Interface}, + {String("impl"), TokenType::Implement}, + {String("is"), TokenType::Is}, + {String("public"), TokenType::Public}, + {String("return"), TokenType::Return}, + {String("break"), TokenType::Break}, + {String("continue"), TokenType::Continue}, + {String("try"), TokenType::Try}, + {String("catch"), TokenType::Catch}, + {String("throw"), TokenType::Throw}, + {String("Finally"), TokenType::Finally}, + {String("as"), TokenType::As}, + }; +}; \ No newline at end of file diff --git a/src/Token/Token.hpp b/src/Token/Token.hpp new file mode 100644 index 0000000..6f94011 --- /dev/null +++ b/src/Token/Token.hpp @@ -0,0 +1,145 @@ +#pragma once + +#include +#include + +#include + +#include +#include + +namespace Fig +{ + enum class TokenType : int8_t + { + Illegal = -1, + EndOfFile = 0, + + Comments, + + Identifier, + + /* Keywords */ + Package, // package + And, // and + Or, // or + Not, // not + Import, // import + Function, // func + Variable, // var + Const, // const + // Final, // final + While, // while + For, // for + If, // if + Else, // else + New, // new + Struct, // struct + Interface, // interface + Implement, // impl + Is, // is + Public, // public + Return, // return + Break, // break + Continue, // continue + Try, // try + Catch, // catch + Throw, // throw + Finally, // finally + As, // as + + // TypeNull, // Null + // TypeInt, // Int + // TypeDeps::String, // Deps::String + // TypeBool, // Bool + // TypeDouble, // Double + + /* Literal Types (not keyword)*/ + LiteralNumber, // number (int,float...) + LiteralString, // string + LiteralBool, // bool (true/false) + LiteralNull, // null (Null unique instance) + + /* Punct */ + Plus, // + + Minus, // - + Asterisk, // * + Slash, // / + Percent, // % + Caret, // ^ + Ampersand, // & + Pipe, // | + Tilde, // ~ + ShiftLeft, // << + ShiftRight, // >> + // Exclamation, // ! + Question, // ? + Assign, // = + Less, // < + Greater, // > + Dot, // . + Comma, // , + Colon, // : + Semicolon, // ; + SingleQuote, // ' + DoubleQuote, // " + // Backtick, // ` + // At, // @ + // Hash, // # + // Dollar, // $ + // Backslash, // '\' + // Underscore, // _ + LeftParen, // ( + RightParen, // ) + LeftBracket, // [ + RightBracket, // ] + LeftBrace, // { + RightBrace, // } + // LeftArrow, // <- + RightArrow, // -> + DoubleArrow, // => + Equal, // == + NotEqual, // != + LessEqual, // <= + GreaterEqual, // >= + PlusEqual, // += + MinusEqual, // -= + AsteriskEqual, // *= + SlashEqual, // /= + PercentEqual, // %= + CaretEqual, // ^= + DoublePlus, // ++ + DoubleMinus, // -- + DoubleAmpersand, // && + DoublePipe, // || + Walrus, // := + Power, // ** + + TripleDot, // ... for variadic parameter + }; + + class Token final + { + public: + static const HashMap symbolMap; + static const HashMap keywordMap; + + const size_t index, length; + // 源文件中的下标 Token长度 + const TokenType type; + + Token() : index(0), length(0), type(TokenType::Illegal) {}; + Token(size_t _index, size_t _length, TokenType _type) : index(_index), length(_length), type(_type) {} + Deps::String toString() const + { + return Deps::String(std::format("Token'{}' at {}, len {}", magic_enum::enum_name(type), index, length)); + } + + bool isIdentifier() const { return type == TokenType::Identifier; } + bool isLiteral() const + { + return type == TokenType::LiteralNull || type == TokenType::LiteralBool || type == TokenType::LiteralNumber + || type == TokenType::LiteralString; + } + }; +} // namespace Fig \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..55158e0 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,15 @@ +#include +#include + +int main() +{ + using namespace Fig; + Error error{ErrorType::MayBeNull, + "unterminated string literal", + "terminated it", + SourceLocation{2, 4, 5, "main.cpp", "main", "main"}}; + SourceManager manager = SourceManager("T:/Files/Maker/Code/MyCodingLanguage/The Fig Project/Fig/src/main.cpp"); + manager.Read(); + + ReportError(error, manager); +} \ No newline at end of file diff --git a/xmake.lua b/xmake.lua index e69de29..fe965a2 100644 --- a/xmake.lua +++ b/xmake.lua @@ -0,0 +1,39 @@ +add_rules("mode.debug", "mode.release") +add_rules("plugin.compile_commands.autoupdate", {outputdir = ".vscode"}) + +set_policy("run.autobuild", false) + +if is_plat("linux") then + -- Linux: clang + libc++ + set_toolchains("clang") + add_cxxflags("-stdlib=libc++") + add_ldflags("-stdlib=libc++") +elseif is_plat("windows") then + -- 1. CI cross (Linux -> Windows) + -- 2. local dev (Windows + llvm-mingw) + set_toolchains("mingw") -- llvm-mingw + add_ldflags("-Wl,--stack,268435456") + -- set_toolchains("clang") + -- static lib + -- add_ldflags("-target x86_64-w64-mingw32", "-static") + -- add_cxxflags("-stdlib=libc++") + -- add_ldflags("-stdlib=libc++") +end + +set_languages("c++23") +add_includedirs("src") + +add_defines("__FCORE_COMPILE_TIME=\"" .. os.date("%Y-%m-%d %H:%M:%S") .. "\"") + +target("StringTest") + add_files("src/Deps/String/StringTest.cpp") + +target("Fig") + add_files("src/Core/*.cpp") + add_files("src/Token/Token.cpp") + + add_files("src/Error/Error.cpp") + + add_files("src/Lexer/Lexer.cpp") + + add_files("src/main.cpp") \ No newline at end of file