From 2631f76da11a30283f51d21f9e21dedf293bfca3 Mon Sep 17 00:00:00 2001 From: PuqiAR Date: Fri, 20 Feb 2026 14:05:56 +0800 Subject: [PATCH] feat: Implement compiler and virtual machine for Fig language - Added Compiler class with methods for compiling programs, statements, and expressions. - Introduced Proto structure to hold compiled bytecode and constants. - Implemented expression compilation including literals, identifiers, and infix expressions. - Developed statement compilation for variable declarations and expression statements. - Created a VM class to execute compiled bytecode with support for arithmetic and comparison operations. - Added Object and Value classes for handling different data types and memory management. - Implemented String and Struct objects for enhanced data representation. - Established a parser for parsing variable declarations and statements. - Included tests for the VM and object representations. --- src/Ast/Ast.hpp | 1 + src/Ast/Base.hpp | 45 +++- src/Ast/Operator.hpp | 28 ++- src/Ast/Stmt/ExprStmt.hpp | 35 ++++ src/Ast/Stmt/VarDecl.hpp | 3 +- src/Bytecode/Bytecode.hpp | 56 +++-- src/Compiler/CompileTest.cpp | 58 ++++++ src/Compiler/Compiler.cpp | 28 +++ src/Compiler/Compiler.hpp | 305 ++++++++++++++++++++++++++++ src/Compiler/ExprCompiler.cpp | 215 ++++++++++++++++++++ src/Compiler/StmtCompiler.cpp | 58 ++++++ src/Error/Error.cpp | 5 +- src/Error/Error.hpp | 5 + src/Lexer/Lexer.cpp | 90 +++++--- src/Object/Object.cpp | 13 ++ src/Object/Object.hpp | 12 ++ src/Object/ObjectBase.hpp | 237 +++++++++++++++++++++ src/Object/ObjectTest.cpp | 20 ++ src/Object/String.hpp | 29 +++ src/Object/Struct.hpp | 54 +++++ src/Parser/ExprParser.cpp | 22 +- src/Parser/Parser.cpp | 15 +- src/Parser/Parser.hpp | 67 ++++-- src/Parser/ParserTest.cpp | 9 +- src/Parser/StmtParser.cpp | 82 ++++++++ src/SourceManager/SourceManager.hpp | 16 +- src/VM/VM.cpp | 127 ++++++++++++ src/VM/VM.hpp | 75 +++++++ src/VM/__VMTest.cpp | 5 + src/main.cpp | 67 ++++++ xmake.lua | 34 +++- 31 files changed, 1722 insertions(+), 94 deletions(-) create mode 100644 src/Ast/Stmt/ExprStmt.hpp create mode 100644 src/Compiler/CompileTest.cpp create mode 100644 src/Compiler/Compiler.cpp create mode 100644 src/Compiler/Compiler.hpp create mode 100644 src/Compiler/ExprCompiler.cpp create mode 100644 src/Compiler/StmtCompiler.cpp create mode 100644 src/Object/Object.cpp create mode 100644 src/Object/Object.hpp create mode 100644 src/Object/ObjectBase.hpp create mode 100644 src/Object/ObjectTest.cpp create mode 100644 src/Object/String.hpp create mode 100644 src/Object/Struct.hpp create mode 100644 src/Parser/StmtParser.cpp create mode 100644 src/VM/VM.cpp create mode 100644 src/VM/VM.hpp create mode 100644 src/VM/__VMTest.cpp diff --git a/src/Ast/Ast.hpp b/src/Ast/Ast.hpp index ead985f..c4ba8f8 100644 --- a/src/Ast/Ast.hpp +++ b/src/Ast/Ast.hpp @@ -14,4 +14,5 @@ #include #include +#include #include \ No newline at end of file diff --git a/src/Ast/Base.hpp b/src/Ast/Base.hpp index 7e5a7c5..acdda67 100644 --- a/src/Ast/Base.hpp +++ b/src/Ast/Base.hpp @@ -16,29 +16,33 @@ namespace Fig enum class AstType : std::uint8_t { AstNode, // 基类 + Program, // 程序 Expr, // 表达式 Stmt, // 语句 /* Expressions */ IdentiExpr, // 标识符表达式 LiteralExpr, // 字面量表达式 - PrefixExpr, // 一元 前缀表达式 - InfixExpr, // 二元 中缀表达式 - - IndexExpr, // 后缀表达式,索引 - CallExpr, // 后缀表达式,函数调用 + PrefixExpr, // 一元 前缀表达式 + InfixExpr, // 二元 中缀表达式 + + IndexExpr, // 后缀表达式,索引 + CallExpr, // 后缀表达式,函数调用 /* Statements */ - VarDecl, + ExprStmt, // 表达式语句,如 println(1) + VarDecl, // 变量声明 }; struct AstNode { - AstType type = AstType::AstNode; + AstType type = AstType::AstNode; SourceLocation location; virtual String toString() const = 0; }; + struct Program; + struct Expr : public AstNode { Expr() @@ -55,6 +59,31 @@ namespace Fig type = AstType::Stmt; } }; + + struct Program final : public AstNode + { + DynArray nodes; + + Program() + { + type = AstType::Program; + } + + Program(DynArray _nodes) + { + type = AstType::Program; + nodes = std::move(_nodes); + if (!_nodes.empty()) + { + location = std::move(_nodes.back()->location); + } + } + + virtual String toString() const override + { + return "Program"; + } + }; }; // namespace Fig namespace std @@ -73,4 +102,4 @@ namespace std return std::format_to(ctx.out(), "{}", _node->toString().toStdString()); } }; -}; \ No newline at end of file +}; // namespace std \ No newline at end of file diff --git a/src/Ast/Operator.hpp b/src/Ast/Operator.hpp index 4bb4179..4f36a41 100644 --- a/src/Ast/Operator.hpp +++ b/src/Ast/Operator.hpp @@ -20,6 +20,8 @@ namespace Fig Negate, // 取反 - Not, // 逻辑非 ! / not AddressOf, // 取引用 & + + Count // 哨兵,(int) Count 获得运算符数量(注意,enum必须从 0 开始且不中断) }; enum class BinaryOperator : std::uint8_t { @@ -43,13 +45,13 @@ namespace Fig Power, // 幂运算 ** - Assign, // 赋值(修改) = - AddAssign, // += - SubAssign, // -= + Assign, // 赋值(修改) = + AddAssign, // += + SubAssign, // -= MultiplyAssign, // *= - DivideAssign, // /= - ModuloAssign, // %= - BitXorAssign, // ^= + DivideAssign, // /= + ModuloAssign, // %= + BitXorAssign, // ^= // 位运算 BitAnd, // 按位与 & @@ -60,14 +62,22 @@ namespace Fig // 成员访问 MemberAccess, // . + + Count // 哨兵,(int) Count 获得运算符数量(注意,enum必须从 0 开始且不中断) }; + constexpr unsigned int GetOperatorsSize() + { + // 获取全部运算符的数量 + return static_cast(UnaryOperator::Count) + static_cast(BinaryOperator::Count); + } + using BindingPower = unsigned int; - HashMap &GetUnaryOpMap(); + HashMap &GetUnaryOpMap(); HashMap &GetBinaryOpMap(); - HashMap &GetUnaryOpBindingPowerMap(); + HashMap &GetUnaryOpBindingPowerMap(); HashMap &GetBinaryOpBindingPowerMap(); BindingPower GetUnaryOpRBp(UnaryOperator); @@ -77,6 +87,6 @@ namespace Fig bool IsTokenOp(TokenType type, bool binary = true); - UnaryOperator TokenToUnaryOp(const Token &); + UnaryOperator TokenToUnaryOp(const Token &); BinaryOperator TokenToBinaryOp(const Token &); }; // namespace Fig \ No newline at end of file diff --git a/src/Ast/Stmt/ExprStmt.hpp b/src/Ast/Stmt/ExprStmt.hpp new file mode 100644 index 0000000..9279779 --- /dev/null +++ b/src/Ast/Stmt/ExprStmt.hpp @@ -0,0 +1,35 @@ +/*! + @file src/Ast/Stmt/ExprStmt.hpp + @brief ExprStmt定义 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#pragma once + +#include + +namespace Fig +{ + struct ExprStmt final : public Stmt + { + Expr *expr; + + ExprStmt() + { + type = AstType::ExprStmt; + } + + ExprStmt(Expr *_expr) : + expr(_expr) + { + type = AstType::ExprStmt; + location = _expr->location; + } + + virtual String toString() const override + { + return std::format("", expr->toString()); + } + }; +} \ No newline at end of file diff --git a/src/Ast/Stmt/VarDecl.hpp b/src/Ast/Stmt/VarDecl.hpp index a335978..9b34f15 100644 --- a/src/Ast/Stmt/VarDecl.hpp +++ b/src/Ast/Stmt/VarDecl.hpp @@ -23,12 +23,13 @@ namespace Fig type = AstType::VarDecl; } - VarDecl(String _name, Expr *_typeSpecifier, Expr *_initExpr, SourceLocation _location) : + VarDecl(bool _isPublic, String _name, Expr *_typeSpecifier, Expr *_initExpr, SourceLocation _location) : name(std::move(_name)), typeSpecifier(_typeSpecifier), initExpr(_initExpr) // location 指向关键字 var/const位置 { type = AstType::VarDecl; + isPublic = _isPublic; location = std::move(_location); } diff --git a/src/Bytecode/Bytecode.hpp b/src/Bytecode/Bytecode.hpp index a71bfe8..cf27d5c 100644 --- a/src/Bytecode/Bytecode.hpp +++ b/src/Bytecode/Bytecode.hpp @@ -2,28 +2,60 @@ @file src/Bytecode/Bytecode.hpp @brief 字节码Bytecode定义 @author PuqiAR (im@puqiar.top) - @date 2026-02-17 + @date 2026-02-18 */ #pragma once #include +#pragma once +#include + namespace Fig { - using OpCodeType = uint8_t; - enum class OpCode : OpCodeType + // 定长 32-bit + using Instruction = std::uint32_t; + + enum class OpCode : std::uint8_t { - LoadConst, // dst, const id - LoadLocal, // dst, slot id - StoreLocal, // slot, src(reg) + Exit, // 结束运行 + LoadK, // iABx 模式: R[A] = Constants[Bx] + Return, // iA 模式: 返回 R[A] 的值 - LoadLocalRef, // dst, slot - LoadRef, // dst, refReg - StoreRef, // refReg, srcReg + Mov, // iABx: R[A] = R[Bx] + Add, // iABC: R[A] = R[B] + R[C] + Sub, // iABC: R[A] = R[B] - R[C] + Mul, // iABC: R[A] = R[B] * R[C] + Div, // iABC: R[A] = R[B] / R[C] + Mod, // iABC: R[A] = R[B] % R[C] + BitXor, // iABC: R[A] = R[B] ^ R[C] - Add, // dst, a, b - Move, // dst, src + Equal, // iABC: R[A] = R[B] == R[C] + NotEqual, // iABC: R[A] = R[B] != R[C] + Greater, // iABC: R[A] = R[B] > R[C] + Less, // iABC: R[A] = R[B] < R[C] + GreaterEqual, // iABC: R[A] = R[B] >= R[C] + LessEqual, // iABC: R[A] = R[B] <= R[C] + + Count, // 哨兵 }; -}; // namespace Fig \ No newline at end of file + + namespace Op + { + // [OpCode: 8] [A: 8] [Bx: 16] + [[nodiscard]] inline constexpr Instruction iABx(OpCode op, std::uint8_t a, std::uint16_t bx) + { + return static_cast(op) | (static_cast(a) << 8) + | (static_cast(bx) << 16); + } + + // [OpCode: 8] [A: 8] [B: 8] [C: 8] + [[nodiscard]] inline constexpr Instruction iABC(OpCode op, std::uint8_t a, std::uint8_t b, std::uint8_t c) + { + return static_cast(op) | (static_cast(a) << 8) + | (static_cast(b) << 16) | (static_cast(c) << 24); + } + } // namespace Op +} // namespace Fig \ No newline at end of file diff --git a/src/Compiler/CompileTest.cpp b/src/Compiler/CompileTest.cpp new file mode 100644 index 0000000..3d6155d --- /dev/null +++ b/src/Compiler/CompileTest.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +int main() +{ + using namespace Fig; + + String fileName = "test.fig"; + String filePath = "T:/Files/Maker/Code/MyCodingLanguage/The Fig Project/Fig/test.fig"; + + SourceManager manager(filePath); + manager.Read(); + + if (!manager.read) + { + std::cerr << "Couldn't read file"; + return 1; + } + + Lexer lexer(manager.GetSource(), fileName); + Parser parser(lexer, manager, fileName); + + const auto &program_result = parser.Parse(); + if (!program_result) + { + ReportError(program_result.error(), manager); + return 1; + } + Program *program = *program_result; + + Compiler compiler(fileName, manager); + const auto &proto_result = compiler.Compile(program); + if (!proto_result) + { + ReportError(proto_result.error(), manager); + return 1; + } + + Proto *proto = *proto_result; + + std::cout << "=== Constant Pool ===" << '\n'; + for (size_t i = 0; i < proto->constants.size(); ++i) + { + std::print("[{}] {}\n", i, proto->constants[i].ToString()); + } + + DumpCode(proto->code); + + std::cout << "\nMax Stack Size: " << (int) proto->maxStack << std::endl; + return 0; +} \ No newline at end of file diff --git a/src/Compiler/Compiler.cpp b/src/Compiler/Compiler.cpp new file mode 100644 index 0000000..ce0fc2d --- /dev/null +++ b/src/Compiler/Compiler.cpp @@ -0,0 +1,28 @@ +/*! + @file src/Compiler/Compiler.cpp + @brief 编译器实现 + @author PuqiAR (im@puqiar.top) + @date 2026-02-18 +*/ + +#include + +namespace Fig +{ + Result Compiler::Compile(Program *program) + { + current->proto = new Proto(); + current->freeReg = 0; + + for (Stmt *stmt : program->nodes) + { + const auto &result = CompileStmt(static_cast(stmt)); + if (!result) + { + return std::unexpected(result.error()); + } + } + Emit(Op::iABC(OpCode::Exit, 0, 0, 0)); // 一定要退出,这是虚拟机退出信号,否则ub + return current->proto; + } +}; // namespace Fig \ No newline at end of file diff --git a/src/Compiler/Compiler.hpp b/src/Compiler/Compiler.hpp new file mode 100644 index 0000000..376c72f --- /dev/null +++ b/src/Compiler/Compiler.hpp @@ -0,0 +1,305 @@ +/*! + @file src/Compiler/Compiler.hpp + @brief 编译器定义 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace Fig +{ + // 编译产物 + struct Proto + { + DynArray code; + DynArray constants; + std::uint8_t maxStack = 0; // 函数运行所需寄存器数量 + }; + + struct LocalVar + { + bool isPublic; // 是否向上级/同级其他域公开 + String name; + std::uint8_t reg; // 寄存器(相对 frame base 的寄存器 id) + int depth; // 作用域深度 + }; + + // 任何跨函数、跨模块的编译,都压入弹出这个 State + struct FuncState + { + String name; + FuncState *enclosing = nullptr; // 指向外层状态 (支持闭包) + Proto *proto = nullptr; + + std::uint8_t freeReg = 0; + int scopeDepth = 0; + DynArray locals; + + FuncState(String _name, FuncState *enc = nullptr) : name(std::move(_name)), enclosing(enc) + { + proto = new Proto(); + } + // 注意:这里不 delete proto,因为 proto 是要作为编译产物吐出去的 + }; + + class Compiler + { + private: + String fileName; + + SourceManager &manager; + FuncState *current = nullptr; // 永远指向当前正在编译的上下文 + public: + Compiler(String _fileName, SourceManager &_manager) : fileName(std::move(_fileName)), manager(_manager) + { + // 初始化顶级作用域 + current = new FuncState("global", nullptr); + } + + ~Compiler() + { + // 内存清理 (如果有异常中断) + while (current != nullptr) + { + FuncState *prev = current->enclosing; + delete current; + current = prev; + } + } + + Result Compile(Program *program); + + private: + void PushState(String _name) + { + current = new FuncState(std::move(_name)); + } + + Proto *PopState() + { + FuncState *oldState = current; + Proto *finishedProto = oldState->proto; + + current = oldState->enclosing; + delete oldState; + + return finishedProto; + } + + std::uint8_t AllocReg() + { + if (current->freeReg >= 250) + { + assert(false && "Register overflow!"); + } + std::uint8_t reg = current->freeReg++; + if (current->freeReg > current->proto->maxStack) + { + current->proto->maxStack = current->freeReg; + } + return reg; + } + + void FreeReg(std::uint8_t reg) + { + // 如果这个寄存器被局部变量使用,不释放直接 Return + for (const auto &local : current->locals) + { + if (local.reg == reg) + { + return; // 拒绝释放,保护局部变量生命周期 + } + } + + // 如果它是纯粹的临时计算结果),释放 + if (reg == current->freeReg - 1) + { + current->freeReg--; + } + } + void Emit(Instruction inst) + { + current->proto->code.push_back(inst); + } + + std::uint16_t AddConstant(Value v) + { + // TODO: 查重 + current->proto->constants.push_back(v); + return static_cast(current->proto->constants.size() - 1); + } + + void BeginScope() + { + current->scopeDepth++; + } + + void EndScope() + { + current->scopeDepth--; + while (!current->locals.empty() && current->locals.back().depth > current->scopeDepth) + { + FreeReg(current->locals.back().reg); + current->locals.pop_back(); + } + } + + bool HasLocalInCurrentScope(const String &name) + { + // 逆向查重 + for (auto it = current->locals.rbegin(); it != current->locals.rend(); ++it) + { + if (it->depth < current->scopeDepth) + break; // 已经超出了当前深度,提前阻断 + if (it->name == name) + return true; + } + return false; + } + + bool HasLocal(const String &name) + { + for (auto it = current->locals.rbegin(); it != current->locals.rend(); ++it) + { + if (it->name == name) + { + if (it->depth == current->scopeDepth) + { + return true; // 同级不管 public直接捕获 + } + else if (it->isPublic) + { + return true; // 不同级变量 public才能被捕捉 + } + } + } + return false; + } + + std::uint8_t ResolveLocal(const String &name) + { + // 变量遮蔽: 永远先使用同级已有的变量, 所以逆向遍历 + for (auto it = current->locals.rbegin(); it != current->locals.rend(); ++it) + { + if (it->name == name) + { + if (it->depth < current->scopeDepth && !it->isPublic) + { + assert(false && "ResolveLocal: Attempt to access a private variable from an outer scope!"); + } + + return it->reg; + } + } + + // 如果在本 Frame 没找到,那就是外层函数的变量 (闭包 Upvalue) 或者全局变量 (Global)。 + assert(false && "ResolveLocal: Variable not found in current frame (Upvalue/Global not implemented yet)!"); + return UINT8_MAX; + } + + std::uint8_t DeclareLocal(bool isPublic, const String &name) + { + std::uint8_t reg = AllocReg(); + current->locals.push_back(LocalVar{isPublic, name, reg, current->scopeDepth}); + return reg; + } + + std::uint8_t DeclareLocal(bool isPublic, const String &name, std::uint8_t reg) + { + current->locals.push_back(LocalVar{isPublic, name, reg, current->scopeDepth}); + return reg; + } + + SourceLocation makeSourceLocation(AstNode *node) + { + SourceLocation location = node->location; // copy + location.functionName = current->name; + location.fileName = fileName; + return location; + } + + Result CompileIdentiExpr(IdentiExpr *); + Result CompileLiteral(LiteralExpr *); + + Result CompileAssignment(InfixExpr *); // 编译赋值,由 CompileInfixExpr调用 + Result CompileInfixExpr(InfixExpr *); + + Result CompileLeftValue(Expr *); // 左值对象,可以是变量、结构体字段或模块对象 + + Result CompileExpr(Expr *); + + Result CompileVarDecl(VarDecl *); + Result CompileStmt(Stmt *); + }; + + inline void DisassembleInstruction(Instruction inst, std::size_t index) + { + // 提取OpCode (低 8 位) + auto op = static_cast(inst & 0xFF); + + std::string_view opName = magic_enum::enum_name(op); + + // 所有指令至少都有 A 操作数 (8~15 位) + std::uint8_t a = (inst >> 8) & 0xFF; + + // 地址补零,指令名左对齐占 10 字符 + std::cout << std::format("{:04d} {:<10} ", index, opName); + + switch (op) + { + case OpCode::Mov: { + // iABx 模式 + std::uint16_t bx = (inst >> 16) & 0xFFFF; + std::cout << std::format("R{:<3} R[{}]", a, bx); + break; + } + case OpCode::LoadK: { + // iABx 模式:解析 Bx (16~31 位) + std::uint16_t bx = (inst >> 16) & 0xFFFF; + std::cout << std::format("R{:<3} K[{}]", a, bx); + break; + } + case OpCode::Add: + case OpCode::Sub: + case OpCode::Mul: + case OpCode::Div: + case OpCode::Mod: { + // iABC 模式:解析 B (16~23 位) 和 C (24~31 位) + std::uint8_t b = (inst >> 16) & 0xFF; + std::uint8_t c = (inst >> 24) & 0xFF; + std::cout << std::format("R{:<3} R{:<3} R{}", a, b, c); + break; + } + case OpCode::Return: { + // iA 模式:只用到了 A + std::cout << std::format("R{}", a); + break; + } + default: { + std::cout << "?"; + break; + } + } + std::cout << '\n'; + } + + inline void DumpCode(const DynArray &code) + { + std::cout << "=== Bytecode ===\n"; + for (std::size_t i = 0; i < code.size(); ++i) + { + DisassembleInstruction(code[i], i); + } + } +}; // namespace Fig \ No newline at end of file diff --git a/src/Compiler/ExprCompiler.cpp b/src/Compiler/ExprCompiler.cpp new file mode 100644 index 0000000..191edb5 --- /dev/null +++ b/src/Compiler/ExprCompiler.cpp @@ -0,0 +1,215 @@ +/*! + @file src/Compiler/ExprCompiler.cpp + @brief 编译器实现(表达式部分) + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#include + +namespace Fig +{ + Result Compiler::CompileIdentiExpr(IdentiExpr *ie) + { + if (!HasLocal(ie->name)) + { + return std::unexpected(Error(ErrorType::UseUndeclaredIdentifier, + std::format("`{}` has not been defined", ie->name), + "none", + makeSourceLocation(ie))); + } + return ResolveLocal(ie->name); + } + Result Compiler::CompileLiteral(LiteralExpr *lit) // 编译字面量, 负责转换 token -> Value + { + const Token &token = lit->token; + String lexeme = manager.GetSub(token.index, token.length); + + if (!token.isLiteral()) + { + assert(false && "CompileLiteral: token is not literal"); + } + + Value v; + + if (token.type == TokenType::LiteralNull) + { + v = Value::GetNullInstance(); + } + else if (token.type == TokenType::LiteralTrue) + { + v = Value::GetTrueInstance(); + } + else if (token.type == TokenType::LiteralFalse) + { + v = Value::GetFalseInstance(); + } + else if (token.type == TokenType::LiteralNumber) + { + // TODO: 更换为无异常手写数字解析版本 + if (lexeme.contains(U'.')) + { + // 非整数 + double d = std::stod(lexeme.toStdString()); + v = Value::FromDouble(d); + } + std::int32_t i = std::stoi(lexeme.toStdString()); + v = Value::FromInt(i); + } + + assert("false" && "CompileLiteral: unsupport literal"); + v = Value::GetNullInstance(); + + std::uint8_t targetReg = AllocReg(); + std::uint16_t kIndex = AddConstant(v); + + Emit(Op::iABx(OpCode::LoadK, targetReg, kIndex)); + return targetReg; + } + Result Compiler::CompileAssignment(InfixExpr *infix) // 编译赋值,由 CompileInfixExpr调用 + { + // op必须为 = + const auto &_lhsReg = CompileLeftValue(infix->left); // 必须为左值对象 + if (!_lhsReg) + { + return _lhsReg; + } + std::uint8_t lhsReg = *_lhsReg; + + const auto &_rhsReg = CompileExpr(infix->right); + std::uint8_t rhsReg = *_rhsReg; + + FreeReg(rhsReg); + switch (infix->op) + { + case BinaryOperator::Assign: { + Emit(Op::iABx(OpCode::Mov, lhsReg, rhsReg)); // lhsReg = rhsReg + break; + } + case BinaryOperator::AddAssign: { + Emit(Op::iABC(OpCode::Add, lhsReg, lhsReg, rhsReg)); // lhsReg = lhsReg + rhsReg + break; + } + case BinaryOperator::SubAssign: { + Emit(Op::iABC(OpCode::Sub, lhsReg, lhsReg, rhsReg)); // lhsReg = lhsReg - rhsReg + break; + } + case BinaryOperator::MultiplyAssign: { + Emit(Op::iABC(OpCode::Mul, lhsReg, lhsReg, rhsReg)); // lhsReg = lhsReg * rhsReg + break; + } + case BinaryOperator::DivideAssign: { + Emit(Op::iABC(OpCode::Div, lhsReg, lhsReg, rhsReg)); // lhsReg = lhsReg / rhsReg + break; + } + case BinaryOperator::ModuloAssign: { + Emit(Op::iABC(OpCode::Mod, lhsReg, lhsReg, rhsReg)); // lhsReg = lhsReg % rhsReg + break; + } + case BinaryOperator::BitXorAssign: { + Emit(Op::iABC(OpCode::BitXor, lhsReg, lhsReg, rhsReg)); // lhsReg = lhsReg ^ rhsReg + break; + } + default: { + assert(false && "CompileAssignment: op unsupported yet"); + } + } + return lhsReg; // 返回赋值的结果,支持连续赋值 + } + Result Compiler::CompileInfixExpr( + InfixExpr *infix) // 编译中缀表达式,返回一个存放结果的寄存器 ID + { + if (infix->op >= BinaryOperator::Assign && infix->op <= BinaryOperator::BitXorAssign) + { + return CompileAssignment(infix); + } + + const auto &_lhsReg = CompileExpr(infix->left); + if (!_lhsReg) + { + return _lhsReg; + } + std::uint8_t lhsReg = *_lhsReg; + const auto &_rhsReg = CompileExpr(infix->right); + if (!_rhsReg) + { + return _rhsReg; + } + std::uint8_t rhsReg = *_rhsReg; + + FreeReg(rhsReg); + FreeReg(lhsReg); + + std::uint8_t resultReg = AllocReg(); + switch (infix->op) + { + case BinaryOperator::Add: { + Emit(Op::iABC(OpCode::Add, resultReg, lhsReg, rhsReg)); + break; + } + + case BinaryOperator::Subtract: { + Emit(Op::iABC(OpCode::Sub, resultReg, lhsReg, rhsReg)); + break; + } + + case BinaryOperator::Multiply: { + Emit(Op::iABC(OpCode::Mul, resultReg, lhsReg, rhsReg)); + break; + } + + case BinaryOperator::Divide: { + Emit(Op::iABC(OpCode::Div, resultReg, lhsReg, rhsReg)); + break; + } + + case BinaryOperator::Modulo: { + Emit(Op::iABC(OpCode::Mod, resultReg, lhsReg, rhsReg)); + break; + } + + default: assert(false && "CompileInfixExpr: op unsupported yet"); + } + return resultReg; + } + Result Compiler::CompileLeftValue(Expr *expr) // 左值对象,可以是变量、结构体字段或模块对象 + { + switch (expr->type) + { + case AstType::IdentiExpr: return CompileIdentiExpr(static_cast(expr)); + + default: + return std::unexpected(Error(ErrorType::NotAnLvalue, + std::format("`{}` is not a lvalue, expect a valid lvalue", expr->toString()), + "none", + makeSourceLocation(expr))); + } + } + Result Compiler::CompileExpr(Expr *expr) // 编译表达式,必定返回一个存放结果的寄存器 ID + { + switch (expr->type) + { + case AstType::Stmt: + case AstType::Expr: + case AstType::AstNode: assert(false && "CompileExpr: bad node type"); break; + + case AstType::IdentiExpr: { + return CompileLeftValue(expr); // 左值直接转换成右值 + } + case AstType::LiteralExpr: { + LiteralExpr *lit = static_cast(expr); + + const auto &result = CompileLiteral(lit); + if (!result) + { + return std::unexpected(result.error()); + } + std::uint8_t targetReg = *result; + return targetReg; + } + case AstType::InfixExpr: { + return CompileInfixExpr(static_cast(expr)); + } + } + } +} // namespace Fig \ No newline at end of file diff --git a/src/Compiler/StmtCompiler.cpp b/src/Compiler/StmtCompiler.cpp new file mode 100644 index 0000000..171691f --- /dev/null +++ b/src/Compiler/StmtCompiler.cpp @@ -0,0 +1,58 @@ +/*! + @file src/Compiler/StmtCompiler.cpp + @brief 编译器实现(语句部分) + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#include + +namespace Fig +{ + Result Compiler::CompileVarDecl(VarDecl *varDecl) + { + const String &name = varDecl->name; + if (HasLocalInCurrentScope(name)) + { + return std::unexpected(Error(ErrorType::RedeclarationError, + std::format("variable `{}` has already defined in this scope", name), + "change its name", + makeSourceLocation(varDecl))); + } + std::uint8_t varReg; + if (varDecl->initExpr) + { + const auto &result = CompileExpr(varDecl->initExpr); + if (!result) + { + return std::unexpected(result.error()); + } + std::uint8_t resultReg = *result; + varReg = resultReg; // 复用临时计算结果寄存器 + DeclareLocal(varDecl->isPublic, name, varReg); + } + else + { + varReg = DeclareLocal(varDecl->isPublic, name); + } + return Result(); + } + Result Compiler::CompileStmt(Stmt *stmt) // 编译语句 + { + if (stmt->type == AstType::ExprStmt) + { + ExprStmt *exprStmt = static_cast(stmt); + Expr *expr = exprStmt->expr; + const auto &result = CompileExpr(expr); + if (!result) + { + return std::unexpected(result.error()); + } + } + else if (stmt->type == AstType::VarDecl) + { + return CompileVarDecl(static_cast(stmt)); + } + return Result(); + } +}; // namespace Fig \ No newline at end of file diff --git a/src/Error/Error.cpp b/src/Error/Error.cpp index 3bddba9..dcf6501 100644 --- a/src/Error/Error.cpp +++ b/src/Error/Error.cpp @@ -52,7 +52,10 @@ namespace Fig case ExpectedExpression: return "ExpectedExpression"; case SyntaxError: return "SyntaxError"; - // default: return "Some one forgot to add case to `ErrorTypeToString`"; + case RedeclarationError: return "RedeclarationError"; + case UseUndeclaredIdentifier: return "UseUndeclaredIdentifier"; + case NotAnLvalue: return "NotAnLvalue"; + // default: return "Some one forgot to add case to `ErrorTypeToString`"; } } diff --git a/src/Error/Error.hpp b/src/Error/Error.hpp index 0c855bf..c8c5a5d 100644 --- a/src/Error/Error.hpp +++ b/src/Error/Error.hpp @@ -41,6 +41,11 @@ namespace Fig // parser errors ExpectedExpression, SyntaxError, + + // compiler errors + RedeclarationError, + UseUndeclaredIdentifier, + NotAnLvalue }; const char *ErrorTypeToString(ErrorType type); diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp index 1a0154b..1b55a8c 100644 --- a/src/Lexer/Lexer.cpp +++ b/src/Lexer/Lexer.cpp @@ -35,7 +35,7 @@ namespace Fig Result Lexer::scanMultilineComments() { - Token tok(rd.currentIndex(), 2, TokenType::Comments); + Token tok(rd.currentIndex(), 2, TokenType::Comments); SourcePosition startPos = rd.currentPosition(); rd.skip(2); // 跳过 / * while (true) @@ -43,9 +43,9 @@ namespace Fig if (rd.isAtEnd()) { return std::unexpected(Error(ErrorType::UnterminatedComments, - "unterminated multiline comments", - "insert '*/'", - makeSourceLocation(startPos))); + "unterminated multiline comments", + "insert '*/'", + makeSourceLocation(startPos))); } if (rd.current() == U'*' && rd.peekIf() == U'/') { @@ -60,7 +60,7 @@ namespace Fig Result Lexer::scanIdentifierOrKeyword() { - Token tok(rd.currentIndex(), 1, TokenType::Identifier); + Token tok(rd.currentIndex(), 1, TokenType::Identifier); String value; // 用于判断是标识符还是关键字 value.push_back(rd.produce()); // 加入第一个 @@ -107,7 +107,7 @@ namespace Fig // std::format("bad number postfix 0{}", String(_peek)), // "correct it", // makeSourceLocation(rd.currentPosition()))); - + // } } @@ -135,15 +135,60 @@ namespace Fig rd.next(); } while (!rd.isAtEnd()); - // 科学计数法 - while (!rd.isAtEnd() && state == State::ScanDec - && (rd.current() == U'e' || rd.current() == U'E' || rd.current() == U'_' || rd.current() == U'+' - || rd.current() == U'-' || CharUtils::isDigit(rd.current()))) + // 下划线表示法(1_000_000) + while (!rd.isAtEnd() && state == State::ScanDec && (rd.current() == U'_' || CharUtils::isDigit(rd.current()))) { tok.length++; rd.next(); } + // 小数点 + if (rd.currentIf() == U'.') + { + tok.length++; + rd.next(); + + if (!CharUtils::isDigit(rd.currentIf())) + { + return std::unexpected(Error(ErrorType::InvalidNumberLiteral, + "need matissa", + "insert matissa", + makeSourceLocation(rd.currentPosition()))); + } + while (!rd.isAtEnd() && CharUtils::isDigit(rd.current())) + { + tok.length++; + rd.next(); + } + } + + // 科学计数法 + if (rd.currentIf() == U'e') + { + tok.length++; + char32_t peek = rd.peekIf(); + if (peek == U'+' || peek == U'-') // ae+b, ae-b + { + tok.length++; + rd.skip(2); // consume `e`, +/- + } + else if (CharUtils::isDigit(peek)) // aeb 情况 + { + rd.next(); // `e` + } + if (!CharUtils::isDigit(rd.currentIf())) + { + return std::unexpected(Error(ErrorType::InvalidNumberLiteral, + "need exponent for scientific notation", + "insert exponent", + makeSourceLocation(rd.currentPosition()))); + } + while (!rd.isAtEnd() && CharUtils::isDigit(rd.current())) + { + tok.length++; + rd.next(); + } + } return tok; } Result Lexer::scanStringLiteral() @@ -152,31 +197,29 @@ namespace Fig SourcePosition startPos = rd.currentPosition(); - Token tok(rd.currentIndex(), 1, TokenType::LiteralString); // " - rd.next(); // skip " / ' + rd.next(); // skip " / ' while (true) { if (state == State::ScanStringDQ && rd.current() == U'"') { - tok.length ++; + tok.length++; rd.next(); // skip '"' break; } else if (state == State::ScanStringSQ && rd.current() == U'\'') { - tok.length ++; + tok.length++; rd.next(); // skip `'` break; } else if (rd.isAtEnd()) { - return std::unexpected( - Error(ErrorType::UnterminatedString, - "unterminated string literal", - std::format("insert '{}'", String((state == State::ScanStringDQ ? "\"" : "'"))), - makeSourceLocation(startPos))); + return std::unexpected(Error(ErrorType::UnterminatedString, + "unterminated string literal", + std::format("insert '{}'", String((state == State::ScanStringDQ ? "\"" : "'"))), + makeSourceLocation(startPos))); } else { @@ -220,9 +263,9 @@ namespace Fig if (!Token::punctMap.contains(sym)) { return std::unexpected(Error(ErrorType::InvalidSymbol, - std::format("invalid symbol `{}`", sym), - "correct it", - makeSourceLocation(rd.currentPosition()))); + std::format("invalid symbol `{}`", sym), + "correct it", + makeSourceLocation(rd.currentPosition()))); } tok.type = Token::punctMap.at(sym); return tok; @@ -276,8 +319,7 @@ namespace Fig } else { - return std::unexpected(Error( - ErrorType::InvalidCharacter, + return std::unexpected(Error(ErrorType::InvalidCharacter, std::format("invalid character '{}' (U+{})", String(rd.current()), static_cast(rd.current())), "correct it", makeSourceLocation(rd.currentPosition()))); diff --git a/src/Object/Object.cpp b/src/Object/Object.cpp new file mode 100644 index 0000000..c514f83 --- /dev/null +++ b/src/Object/Object.cpp @@ -0,0 +1,13 @@ +/*! + @file src/Object/Object.hpp + @brief 值表示实现 (NaN Boxing) 和 堆对象函数的实现 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#include + +namespace Fig +{ + +}; // namespace Fig \ No newline at end of file diff --git a/src/Object/Object.hpp b/src/Object/Object.hpp new file mode 100644 index 0000000..cf50ebd --- /dev/null +++ b/src/Object/Object.hpp @@ -0,0 +1,12 @@ +/*! + @file src/Object/Object.hpp + @brief 值系统总文件 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#pragma once + +#include +#include +#include \ No newline at end of file diff --git a/src/Object/ObjectBase.hpp b/src/Object/ObjectBase.hpp new file mode 100644 index 0000000..51b2d7f --- /dev/null +++ b/src/Object/ObjectBase.hpp @@ -0,0 +1,237 @@ +/*! + @file src/Object/ObjectBase.hpp + @brief 值表示定义 (NaN Boxing) uint64 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#pragma once + +#include +#include + +#include + +namespace Fig +{ + + struct Object; // 前置声明 + + /* + 正常来说直接 Value = std::uint64_t会更快 + 但是这样会带来隐式转换的问题 + + 因此我们封装成一个类,这样速度不会损失很多。 + (release模式编译器会直接优化, 速度和uint64_t直接表示一样快) + */ + class Value + { + private: + std::uint64_t v_; // 唯一的物理成员 sizeof(Value) 永远是 8 字节。 + + // --- 私有掩码常量 --- + static constexpr std::uint64_t QNAN_MASK = 0x7ffc000000000000; + static constexpr std::uint64_t SIGN_BIT = 0x8000000000000000; + + // 专门给 Int32 预留的高位 Tag + static constexpr std::uint32_t INT_TAG_HIGH = 0x7FFD0000; + + // 基础原语 Tag + static constexpr std::uint64_t TAG_NULL = 1; + static constexpr std::uint64_t TAG_FALSE = 2; + static constexpr std::uint64_t TAG_TRUE = 3; + + // 私有底层构造:仅供内部组装使用 + constexpr explicit Value(uint64_t raw) : v_(raw) {} + + public: + // 默认构造为 Null,保证未初始化变量也是安全的 + constexpr Value() + { + *this = GetNullInstance(); + } + + [[nodiscard]] static constexpr Value FromDouble(double d) + { + uint64_t raw = std::bit_cast(d); + // 清洗非法的 NaN + if ((raw & QNAN_MASK) == QNAN_MASK) + return Value(QNAN_MASK); + return Value(raw); + } + + [[nodiscard]] static constexpr Value FromInt(std::int32_t i) + { + // 移位构造,彻底阻断符号扩展漏洞 + return Value((static_cast(INT_TAG_HIGH) << 32) | static_cast(i)); + } + + [[nodiscard]] static constexpr Value &GetTrueInstance() + { + static Value trueInstance(QNAN_MASK | TAG_TRUE); + return trueInstance; + } + [[nodiscard]] static constexpr Value &GetFalseInstance() + { + static Value falseInstance(QNAN_MASK | TAG_FALSE); + return falseInstance; + } + + [[nodiscard]] static constexpr Value &FromBool(bool b) + { + return (b ? GetTrueInstance() : GetFalseInstance()); + } + + [[nodiscard]] static constexpr Value &GetNullInstance() + { + static Value nullInstance(QNAN_MASK | TAG_NULL); + return nullInstance; + } + + [[nodiscard]] static Value FromObject(Object *ptr) + { + return Value(reinterpret_cast(ptr) | SIGN_BIT | QNAN_MASK); + } + + // 类型检查 (Is) + + [[nodiscard]] constexpr bool IsDouble() const + { + return (v_ & QNAN_MASK) != QNAN_MASK; + } + + [[nodiscard]] constexpr bool IsInt() const + { + // 安全的高 32 位移位判定 + return static_cast(v_ >> 32) == INT_TAG_HIGH; + } + + [[nodiscard]] constexpr bool IsNumber() const + { + return IsDouble() || IsInt(); + } + + [[nodiscard]] constexpr bool IsObject() const + { + return (v_ & (SIGN_BIT | QNAN_MASK)) == (SIGN_BIT | QNAN_MASK); + } + + [[nodiscard]] constexpr bool IsNull() const + { + return v_ == (QNAN_MASK | TAG_NULL); + } + + [[nodiscard]] constexpr bool IsBool() const + { + return (v_ | 1) == (QNAN_MASK | TAG_TRUE); + } + + // 提取数据 (Unbox / As) + [[nodiscard]] constexpr double AsDouble() const + { + return std::bit_cast(v_); + } + + [[nodiscard]] constexpr int32_t AsInt() const + { + return static_cast(v_); + } + + // 核心辅助:泛型数字提取。算术指令可以直接用这个,免去手写 if 分支 + // 若不是 int/double 会导致非常恐怖的问题 + [[nodiscard]] constexpr double CastToDouble() const + { + return IsInt() ? static_cast(AsInt()) : AsDouble(); + } + + [[nodiscard]] constexpr bool AsBool() const + { + return v_ == (QNAN_MASK | TAG_TRUE); + } + + [[nodiscard]] struct Object *AsObject() const + { + return reinterpret_cast(v_ & ~(SIGN_BIT | QNAN_MASK)); + } + + // 重载 + + // 暴露原生值用于硬核位运算或 Hash 计算 + [[nodiscard]] constexpr uint64_t Raw() const + { + return v_; + } + + // 让 VM 的 OP_EQ 指令极简:`if (RA == RB)` + [[nodiscard]] constexpr bool operator==(const Value &other) const + { + // IEEE 754 规定浮点数有 +0.0 == -0.0 的特殊规则 + if (IsDouble() && other.IsDouble()) + { + return AsDouble() == other.AsDouble(); + } + // 直接比较 64 位整数内存 + return v_ == other.v_; + } + + [[nodiscard]] constexpr bool operator!=(const Value &other) const + { + return !(*this == other); + } + + // 类函数 + + [[nodiscard]] + constexpr String ToString() const + { + if (IsNull()) + { + return "null"; + } + else if (IsInt()) + { + return std::to_string(AsInt()); + } + else if (IsDouble()) + { + return std::format("{}", AsDouble()); + } + else if (IsBool()) + { + return (AsBool() ? "true" : "false"); + } + else if (IsObject()) + { + return "Object"; // TODO: 分派 + } + else + { + return "Unknow"; + } + } + }; + + /* + C风格继承 + 手动分发 + 禁止任何 virtual 达到最高效率 + */ + enum class ObjectType : uint8_t + { + String, + Function, + Struct, + Instance, + }; + + struct Struct /* : public Object */; // 结构体基类的定义,前向声明 + + // Total 24 bytes size + struct Object + { + Object *next; // 8 bytes: gc链表 + Struct *klass; // 8 bytes: 一切皆对象,父类指针 + ObjectType type; // 1 byte : 类型 + bool isMarked = false; // 1 byte : gc标记 + // + 6 bytes padding + }; +} // namespace Fig \ No newline at end of file diff --git a/src/Object/ObjectTest.cpp b/src/Object/ObjectTest.cpp new file mode 100644 index 0000000..f98e7cd --- /dev/null +++ b/src/Object/ObjectTest.cpp @@ -0,0 +1,20 @@ +#include +#include +#include +#include +#include + +int main() +{ + using namespace Fig; + + Value null; + Value d = Value::FromDouble(-std::numbers::pi); + Value i = Value::FromInt(-2143242); + Value b = Value::FromBool(false); + + std::cout << null.ToString() << '\n'; + std::cout << d.ToString() << '\n'; + std::cout << i.ToString() << '\n'; + std::cout << b.ToString() << '\n'; +} \ No newline at end of file diff --git a/src/Object/String.hpp b/src/Object/String.hpp new file mode 100644 index 0000000..0db5e65 --- /dev/null +++ b/src/Object/String.hpp @@ -0,0 +1,29 @@ +/*! + @file src/Object/String.hpp + @brief 字符串对象标识 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#pragma once + +#include + +namespace Fig +{ + /* + // Total 24 bytes size + struct Object + { + Object *next; // 8 bytes: gc链表 + Struct *klass; // 8 bytes: 一切皆对象,父类指针 + ObjectType type; // 1 byte : 类型 + bool isMarked = false; // 1 byte : gc标记 + // + 6 bytes padding + }; + */ + struct StringObject final : public Object + { + String data; // 40 bytes + }; +}; \ No newline at end of file diff --git a/src/Object/Struct.hpp b/src/Object/Struct.hpp new file mode 100644 index 0000000..541c24c --- /dev/null +++ b/src/Object/Struct.hpp @@ -0,0 +1,54 @@ +/*! + @file src/Object/Struct.hpp + @brief 结构体类型 Struct定义 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#pragma once + +#include +#include + +namespace Fig +{ + /* + // Total 24 bytes size + struct Object + { + Object *next; // 8 bytes: gc链表 + Struct *klass; // 8 bytes: 一切皆对象,父类指针 + ObjectType type; // 1 byte : 类型 + bool isMarked = false; // 1 byte : gc标记 + // + 6 bytes padding + }; + */ + struct StructObject final : public Object + { + String name; // 元信息(仅供调试/打印/反射) + + // 内存布局信息 + std::uint8_t fieldCount; + Object *operators[GetOperatorsSize()]; + /* + 运算符重载,nullptr代表无重载 + 一般为 NativeFunction / Function + + 排列: + [unary operators ]( binary operators] + 0 - UnaryOperators::Count BinaryOperators::Count + */ + + Object *GetUnaryOperator(UnaryOperator _op) + { + std::uint8_t idx = static_cast(_op); + return operators[idx]; + } + + Object *GetBinaryOperator(BinaryOperator _op) + { + std::uint16_t idx = static_cast(UnaryOperator::Count) + static_cast(_op); + return operators[idx]; + } + }; +}; // namespace Fig \ No newline at end of file diff --git a/src/Parser/ExprParser.cpp b/src/Parser/ExprParser.cpp index 84d5f70..d426c87 100644 --- a/src/Parser/ExprParser.cpp +++ b/src/Parser/ExprParser.cpp @@ -13,7 +13,7 @@ namespace Fig { state = State::ParsingLiteralExpr; const Token &literal_token = consumeToken(); - LiteralExpr *node = new LiteralExpr(literal_token, makeSourcelocation(literal_token)); + LiteralExpr *node = new LiteralExpr(literal_token, makeSourceLocation(literal_token)); return node; } Result Parser::parseIdentiExpr() // 当前token为Identifier调用 @@ -21,7 +21,7 @@ namespace Fig state = State::ParsingIdentiExpr; const Token &identifier = consumeToken(); IdentiExpr *node = - new IdentiExpr(srcManager.GetSub(identifier.index, identifier.length), makeSourcelocation(identifier)); + new IdentiExpr(srcManager.GetSub(identifier.index, identifier.length), makeSourceLocation(identifier)); return node; } @@ -75,7 +75,7 @@ namespace Fig if (currentToken().type != TokenType::RightBracket) // `]` { return std::unexpected( - Error(ErrorType::SyntaxError, "unclosed brackets", "insert `]`", makeSourcelocation(lbracket_token))); + Error(ErrorType::SyntaxError, "unclosed brackets", "insert `]`", makeSourceLocation(lbracket_token))); } consumeToken(); // consume `]` @@ -104,7 +104,7 @@ namespace Fig return std::unexpected(Error(ErrorType::SyntaxError, "fn call has unclosed parenthese", "insert `)`", - makeSourcelocation(lparen_token))); + makeSourceLocation(lparen_token))); } const auto &arg_result = parseExpression(); @@ -124,7 +124,7 @@ namespace Fig return std::unexpected(Error(ErrorType::SyntaxError, "expected `,` or `)` in argument list", "insert `,`", - makeSourcelocation(currentToken()))); + makeSourceLocation(currentToken()))); } consumeToken(); // consume `,` @@ -158,7 +158,7 @@ namespace Fig return terminators.contains(token.type); } - Result Parser::parseExpression(BindingPower rbp) + Result Parser::parseExpression(BindingPower rbp, TokenType stop, TokenType stop2) { Expr *lhs = nullptr; Token token = currentToken(); @@ -202,7 +202,7 @@ namespace Fig if (rparen_token.type != TokenType::RightParen) { return std::unexpected(Error( - ErrorType::SyntaxError, "unclosed parenthese", "insert `)`", makeSourcelocation(lparen_token))); + ErrorType::SyntaxError, "unclosed parenthese", "insert `)`", makeSourceLocation(lparen_token))); } lhs = *expr_result; } @@ -212,7 +212,7 @@ namespace Fig return std::unexpected(Error(ErrorType::ExpectedExpression, "expected expression", "insert expressions", - makeSourcelocation(prevToken()))); + makeSourceLocation(prevToken()))); } while (true) @@ -222,6 +222,10 @@ namespace Fig { break; } + if (token.type == stop || token.type == stop2) + { + break; + } if (IsTokenOp(token.type /* isBinary = true */)) // 是否为二元运算符 { @@ -266,7 +270,7 @@ namespace Fig return std::unexpected(Error(ErrorType::ExpectedExpression, "expression unexpectedly ended", "insert expressions", - makeSourcelocation(token))); + makeSourceLocation(token))); } } return lhs; diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp index ebe8a85..67e591a 100644 --- a/src/Parser/Parser.cpp +++ b/src/Parser/Parser.cpp @@ -10,9 +10,18 @@ namespace Fig { - DynArray Parser::parseAll() + Result Parser::Parse() { - DynArray nodes; - return nodes; + Program *program = new Program; + while (!isEOF) + { + const auto &result = parseStatement(); + if (!result) + { + return std::unexpected(result.error()); + } + program->nodes.push_back(*result); + } + return program; } }; \ No newline at end of file diff --git a/src/Parser/Parser.hpp b/src/Parser/Parser.hpp index 63fc98c..cd3d0fd 100644 --- a/src/Parser/Parser.hpp +++ b/src/Parser/Parser.hpp @@ -111,6 +111,38 @@ namespace Fig return current; } + inline bool match(TokenType type) + { + if (currentToken().type == type) + { + consumeToken(); + return true; + } + return false; + } + + inline Error makeUnexpectTokenError(const String &stmtType, const String &expect, const Token &tokenGot, std::source_location loc = std::source_location::current()) + { + return Error( + ErrorType::SyntaxError, + std::format("expect '{}' in {}, got `{}`", expect, stmtType, magic_enum::enum_name(tokenGot.type)), + "none", + makeSourceLocation(tokenGot), + loc + ); + } + + inline Error makeExpectSemicolonError(std::source_location loc = std::source_location::current()) + { + return Error( + ErrorType::SyntaxError, + "expect ';' after statement", + "insert ';'", + makeSourceLocation(currentToken()), + loc + ); + } + public: enum class State : std::uint8_t { @@ -125,6 +157,8 @@ namespace Fig ParsingIndexExpr, ParsingCallExpr, + ParsingVarDecl, + } state; Parser(Lexer &_lexer, SourceManager &_srcManager, String _fileName) : @@ -134,37 +168,36 @@ namespace Fig } private: - SourceLocation makeSourcelocation(const Token &tok) + SourceLocation makeSourceLocation(const Token &tok) { auto [line, column] = srcManager.GetLineColumn(tok.index); - return SourceLocation( - SourcePosition( - line, - column, - tok.length - ), fileName, "[internal parser]", magic_enum::enum_name(state).data()); + return SourceLocation(SourcePosition(line, column, tok.length), + fileName, + "[internal parser]", + magic_enum::enum_name(state).data()); } /* Expressions */ Result parseLiteralExpr(); // 当前token为literal时调用 Result parseIdentiExpr(); // 当前token为Identifier调用 - Result parseInfixExpr(Expr *); // 由 parseExpression递归调用, 当前token为op - Result parsePrefixExpr(); // 由 parseExpression递归调用, 当前token为op + Result parseInfixExpr(Expr *); // 由 parseExpression递归调用, 当前token为op + Result parsePrefixExpr(); // 由 parseExpression递归调用, 当前token为op Result parseIndexExpr(Expr *); // 由 parseExpression调用, 当前token为 `[` - Result parseCallExpr(Expr *); // 由 parseExpression调用, 当前token为 `(` + Result parseCallExpr(Expr *); // 由 parseExpression调用, 当前token为 `(` - std::unordered_set getTerminators(); // 返回固定的终止符 - bool shouldTerminate(); // 判断是否终结 + std::unordered_set getTerminators(); // 返回固定的终止符 + bool shouldTerminate(); // 判断是否终结 - // Result parseExpression(BindingPower = 0); + Result parseExpression(BindingPower = 0, TokenType stop = TokenType::Semicolon, TokenType stop2 = TokenType::Semicolon); /* Statements */ - - public: + Result parseVarDecl(bool); // 由 parseStatement调用, 当前token为 var - Result parseExpression(BindingPower = 0); - DynArray parseAll(); + Result parseStatement(); + + public: + Result Parse(); }; }; // namespace Fig \ No newline at end of file diff --git a/src/Parser/ParserTest.cpp b/src/Parser/ParserTest.cpp index 4db3de3..e89e467 100644 --- a/src/Parser/ParserTest.cpp +++ b/src/Parser/ParserTest.cpp @@ -18,12 +18,15 @@ int main() Lexer lexer(source, fileName); Parser parser(lexer, srcManager, fileName); - const auto &result = parser.parseExpression(); + const auto &result = parser.Parse(); if (!result) { ReportError(result.error(), srcManager); return 1; } - Expr *expr = *result; - std::cout << expr->toString() << '\n'; + Program *program = *result; + for (Stmt *stmt : program->nodes) + { + std::cout << stmt->toString() << '\n'; + } } \ No newline at end of file diff --git a/src/Parser/StmtParser.cpp b/src/Parser/StmtParser.cpp new file mode 100644 index 0000000..5ef3c49 --- /dev/null +++ b/src/Parser/StmtParser.cpp @@ -0,0 +1,82 @@ +/*! + @file src/Parser/StmtParser.hpp + @brief 语法分析器(Pratt + 手动递归下降) 语句解析实现 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#include + +namespace Fig +{ + Result Parser::parseVarDecl(bool isPublic) // 由 parseStatement调用, 当前token为 var + { + state = State::ParsingVarDecl; + + SourceLocation location = makeSourceLocation(consumeToken()); // consume `var` + + if (currentToken().type != TokenType::Identifier) + { + return std::unexpected(makeUnexpectTokenError("VarDecl", "var name", currentToken())); + } + const String &name = srcManager.GetSub(currentToken().index, currentToken().length); + consumeToken(); // consume name + + Expr *typeSpeicifer = nullptr; + if (match(TokenType::Colon)) // `:` + { + const auto &result = parseExpression(0, TokenType::Assign); + if (!result) + { + return std::unexpected(result.error()); + } + typeSpeicifer = *result; + } + + Expr *initExpr = nullptr; + if (match(TokenType::Assign)) + { + const auto &result = parseExpression(); + if (!result) + { + return std::unexpected(result.error()); + } + initExpr = *result; + } + if (!match(TokenType::Semicolon)) + { + makeExpectSemicolonError(); + } + VarDecl *varDecl = new VarDecl(isPublic, name, typeSpeicifer, initExpr, location); + return varDecl; + } + Result Parser::parseStatement() + { + if (currentToken().type == TokenType::Public) + { + consumeToken(); // consume `public` + if (currentToken().type == TokenType::Variable) + { + return parseVarDecl(true); + } + } + else if (currentToken().type == TokenType::Variable) + { + return parseVarDecl(false); + } + else + { + const auto &expr_result = parseExpression(0); + if (!expr_result) + { + return std::unexpected(expr_result.error()); + } + ExprStmt *exprStmt = new ExprStmt(*expr_result); + if (!match(TokenType::Semicolon)) + { + return std::unexpected(makeExpectSemicolonError()); + } + return exprStmt; + } + } +}; // namespace Fig \ No newline at end of file diff --git a/src/SourceManager/SourceManager.hpp b/src/SourceManager/SourceManager.hpp index 3a44e7f..150767c 100644 --- a/src/SourceManager/SourceManager.hpp +++ b/src/SourceManager/SourceManager.hpp @@ -7,8 +7,9 @@ #pragma once -#include #include +#include + #include @@ -17,8 +18,8 @@ namespace Fig class SourceManager { private: - String filePath; - String source; + String filePath; + String source; std::vector lines; std::vector lineStartIndex; // 每行在整个源字符串中的起始 index @@ -45,7 +46,7 @@ namespace Fig } public: - bool read = false; + bool read = false; String &Read() { std::fstream fs(filePath.toStdString()); @@ -73,7 +74,10 @@ namespace Fig } SourceManager() {} - SourceManager(String _path) { filePath = std::move(_path); } + SourceManager(String _path) + { + filePath = std::move(_path); + } bool HasLine(int64_t _line) const { @@ -127,4 +131,4 @@ namespace Fig return {line + 1, column}; } }; -}; \ No newline at end of file +}; // namespace Fig \ No newline at end of file diff --git a/src/VM/VM.cpp b/src/VM/VM.cpp new file mode 100644 index 0000000..7fd25cf --- /dev/null +++ b/src/VM/VM.cpp @@ -0,0 +1,127 @@ +/*! + @file src/VM/VM.hpp + @brief 虚拟机核心执行引擎实现 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#include + +#define BINARY_ARITHMETIC_OP(opCode, op) \ + case OpCode::opCode: { \ + std::uint8_t b = decodeB(inst); \ + std::uint8_t c = decodeC(inst); \ + Value lhs = registers[b]; \ + Value rhs = registers[c]; \ + if (lhs.IsInt() && rhs.IsInt()) [[likely]] \ + { \ + registers[a] = Value::FromInt(lhs.AsInt() op rhs.AsInt()); \ + } \ + else if (lhs.IsDouble() && rhs.IsDouble()) [[likely]] \ + { \ + registers[a] = Value::FromDouble(lhs.AsDouble() op rhs.AsDouble()); \ + } \ + /* 隐式类型提升:Int 与 Double 混合运算 */ \ + else if (lhs.IsInt() && rhs.IsDouble()) [[likely]] \ + { \ + registers[a] = Value::FromDouble(lhs.AsInt() op rhs.AsDouble()); \ + } \ + else if (lhs.IsDouble() && rhs.IsInt()) [[likely]] \ + { \ + registers[a] = Value::FromDouble(lhs.AsDouble() op rhs.AsInt()); \ + } \ + else \ + { \ + assert(false && "VM Runtime Error: Unsupported types for arithmetic operation"); \ + } \ + break; \ + } + +#define BINARY_COMPARE_OP(opCode, op) \ + case OpCode::opCode: { \ + std::uint8_t b = decodeB(inst); \ + std::uint8_t c = decodeC(inst); \ + Value lhs = registers[b]; \ + Value rhs = registers[c]; \ + if (lhs.IsInt() && rhs.IsInt()) [[likely]] \ + { \ + registers[a] = (lhs.AsInt() op rhs.AsInt()) ? Value::GetTrueInstance() : Value::GetFalseInstance(); \ + } \ + else if (lhs.IsDouble() && rhs.IsDouble()) [[likely]] \ + { \ + registers[a] = (lhs.AsDouble() op rhs.AsDouble()) ? Value::GetTrueInstance() : Value::GetFalseInstance(); \ + } \ + else if (lhs.IsInt() && rhs.IsDouble()) [[likely]] \ + { \ + registers[a] = (lhs.AsInt() op rhs.AsDouble()) ? Value::GetTrueInstance() : Value::GetFalseInstance(); \ + } \ + else if (lhs.IsDouble() && rhs.IsInt()) [[likely]] \ + { \ + registers[a] = (lhs.AsDouble() op rhs.AsInt()) ? Value::GetTrueInstance() : Value::GetFalseInstance(); \ + } \ + else \ + { \ + /* TODO: 非数字比较 */ \ + assert(false && "VM Runtime Error: Unsupported types for comparison"); \ + } \ + break; \ + } + +namespace Fig +{ + Result VM::Execute(Proto *proto) + { + // 指令指针 (Instruction Pointer / PC) 和 常量池指针 + const Instruction *ip = proto->code.data(); + const Value *k = proto->constants.data(); + + // 核心解释器循环 (The Dispatch Loop) + while (true) + { + // 取指并递增指针 + Instruction inst = *ip++; + + // 解码 OpCode 和 A 操作数 + OpCode op = decodeOpCode(inst); + std::uint8_t a = decodeA(inst); + switch (op) + { + case OpCode::Exit: { + return Value::GetNullInstance(); + } + case OpCode::LoadK: { + std::uint16_t bx = decodeBx(inst); + registers[a] = k[bx]; // constants + break; + } + + case OpCode::Mov: { + std::uint16_t bx = decodeBx(inst); + registers[a] = registers[bx]; + break; + } + + BINARY_ARITHMETIC_OP(Add, +); + BINARY_ARITHMETIC_OP(Sub, -); + BINARY_ARITHMETIC_OP(Mul, *); + BINARY_ARITHMETIC_OP(Div, /); + + BINARY_COMPARE_OP(Equal, ==); + BINARY_COMPARE_OP(NotEqual, !=); + BINARY_COMPARE_OP(Greater, >); + BINARY_COMPARE_OP(Less, <); + BINARY_COMPARE_OP(GreaterEqual, >=); + BINARY_COMPARE_OP(LessEqual, <=); + + case OpCode::Return: { + return registers[a]; + } + + default: { + assert(false && "VM: Unknown OpCode encountered!"); + } + } + } + return Value::GetNullInstance(); + } +}; // namespace Fig \ No newline at end of file diff --git a/src/VM/VM.hpp b/src/VM/VM.hpp new file mode 100644 index 0000000..abb1485 --- /dev/null +++ b/src/VM/VM.hpp @@ -0,0 +1,75 @@ +/*! + @file src/VM/VM.hpp + @brief 虚拟机核心执行引擎 + @author PuqiAR (im@puqiar.top) + @date 2026-02-19 +*/ + +#pragma once + +#include +#include +#include + +#include // debug +#include + +namespace Fig +{ + class VM + { + private: + static constexpr unsigned int MAX_REGISTERS = 1024; + + // 一次性分配 + Value registers[MAX_REGISTERS]; + + public: + VM() + { + for (unsigned int i = 0; i < MAX_REGISTERS; ++i) + { + registers[i] = Value::GetNullInstance(); + } + } + + private: + inline OpCode decodeOpCode(Instruction inst) + { + return static_cast(inst & 0xFF); + } + inline std::uint8_t decodeA(Instruction inst) + { + return (inst >> 8) & 0xFF; + } + inline std::uint16_t decodeBx(Instruction inst) + { + return (inst >> 16) & 0xFFFF; + } + inline std::uint8_t decodeB(Instruction inst) + { + return (inst >> 16) & 0xFF; + } + inline std::uint8_t decodeC(Instruction inst) + { + return (inst >> 24) & 0xFF; + } + + public: + // 执行入口:接收 Proto + Result Execute(Proto *proto); + + inline void PrintRegisters() + { + std::cout << "=== Registers ===" << '\n'; + for (unsigned int i = 0; i < MAX_REGISTERS; ++i) + { + Value &v = registers[i]; + if (!v.IsNull()) + { + std::println("[{}] {}", i, v.ToString()); + } + } + } + }; +} // namespace Fig \ No newline at end of file diff --git a/src/VM/__VMTest.cpp b/src/VM/__VMTest.cpp new file mode 100644 index 0000000..fdb8aaf --- /dev/null +++ b/src/VM/__VMTest.cpp @@ -0,0 +1,5 @@ +/* + 哈哈! + VM的测试? + 就是main.cpp啦!全流程执行! +*/ \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index b08a07e..eb8b481 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,4 +1,71 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + int main() { + using namespace Fig; + + String fileName = "test.fig"; + String filePath = "T:/Files/Maker/Code/MyCodingLanguage/The Fig Project/Fig/test.fig"; + + SourceManager manager(filePath); + manager.Read(); + + if (!manager.read) + { + std::cerr << "Couldn't read file"; + return 1; + } + + Lexer lexer(manager.GetSource(), fileName); + Parser parser(lexer, manager, fileName); + + const auto &program_result = parser.Parse(); + if (!program_result) + { + ReportError(program_result.error(), manager); + return 1; + } + Program *program = *program_result; + + Compiler compiler(fileName, manager); + const auto &proto_result = compiler.Compile(program); + if (!proto_result) + { + ReportError(proto_result.error(), manager); + return 1; + } + + Proto *proto = *proto_result; + + std::cout << "=== Constant Pool ===" << '\n'; + for (size_t i = 0; i < proto->constants.size(); ++i) + { + std::print("[{}] {}\n", i, proto->constants[i].ToString()); + } + + DumpCode(proto->code); + + std::cout << "\nMax Stack Size: " << (int) proto->maxStack << std::endl; + VM vm; + + const auto &result_ = vm.Execute(proto); + if (!result_) + { + ReportError(result_.error(), manager); + return 1; + } + Value result = *result_; + std::cout << "result: " << result.ToString() << "\n"; + vm.PrintRegisters(); } \ No newline at end of file diff --git a/xmake.lua b/xmake.lua index b1c40b9..59b6ce4 100644 --- a/xmake.lua +++ b/xmake.lua @@ -44,10 +44,34 @@ target("ParserTest") add_files("src/Ast/Operator.cpp") add_files("src/Parser/ExprParser.cpp") + add_files("src/Parser/StmtParser.cpp") add_files("src/Parser/Parser.cpp") add_files("src/Parser/ParserTest.cpp") +target("ObjectTest") + add_files("src/Object/Object.cpp") + add_files("src/Object/ObjectTest.cpp") + +target("CompilerTest") + add_files("src/Core/*.cpp") + add_files("src/Token/Token.cpp") + add_files("src/Error/Error.cpp") + add_files("src/Lexer/Lexer.cpp") + + add_files("src/Ast/Operator.cpp") + add_files("src/Parser/ExprParser.cpp") + add_files("src/Parser/StmtParser.cpp") + add_files("src/Parser/Parser.cpp") + + add_files("src/Object/Object.cpp") + + add_files("src/Compiler/ExprCompiler.cpp") + add_files("src/Compiler/StmtCompiler.cpp") + add_files("src/Compiler/Compiler.cpp") + + add_files("src/Compiler/CompileTest.cpp") + target("Fig") add_files("src/Core/*.cpp") add_files("src/Token/Token.cpp") @@ -56,6 +80,14 @@ target("Fig") add_files("src/Ast/Operator.cpp") add_files("src/Parser/ExprParser.cpp") + add_files("src/Parser/StmtParser.cpp") add_files("src/Parser/Parser.cpp") - + + add_files("src/Object/Object.cpp") + + add_files("src/Compiler/ExprCompiler.cpp") + add_files("src/Compiler/StmtCompiler.cpp") + add_files("src/Compiler/Compiler.cpp") + + add_files("src/VM/VM.cpp") add_files("src/main.cpp") \ No newline at end of file