feat: Implement compiler and virtual machine for Fig language

- Added Compiler class with methods for compiling programs, statements, and expressions.
- Introduced Proto structure to hold compiled bytecode and constants.
- Implemented expression compilation including literals, identifiers, and infix expressions.
- Developed statement compilation for variable declarations and expression statements.
- Created a VM class to execute compiled bytecode with support for arithmetic and comparison operations.
- Added Object and Value classes for handling different data types and memory management.
- Implemented String and Struct objects for enhanced data representation.
- Established a parser for parsing variable declarations and statements.
- Included tests for the VM and object representations.
This commit is contained in:
2026-02-20 14:05:56 +08:00
parent f2e899c7a7
commit 2631f76da1
31 changed files with 1722 additions and 94 deletions

305
src/Compiler/Compiler.hpp Normal file
View File

@@ -0,0 +1,305 @@
/*!
@file src/Compiler/Compiler.hpp
@brief 编译器定义
@author PuqiAR (im@puqiar.top)
@date 2026-02-19
*/
#pragma once
#include <Ast/Ast.hpp>
#include <Bytecode/Bytecode.hpp>
#include <Deps/Deps.hpp>
#include <Error/Error.hpp>
#include <Object/Object.hpp>
#include <SourceManager/SourceManager.hpp>
#include <cassert>
#include <iostream>
namespace Fig
{
// 编译产物
struct Proto
{
DynArray<Instruction> code;
DynArray<Value> constants;
std::uint8_t maxStack = 0; // 函数运行所需寄存器数量
};
struct LocalVar
{
bool isPublic; // 是否向上级/同级其他域公开
String name;
std::uint8_t reg; // 寄存器(相对 frame base 的寄存器 id)
int depth; // 作用域深度
};
// 任何跨函数、跨模块的编译,都压入弹出这个 State
struct FuncState
{
String name;
FuncState *enclosing = nullptr; // 指向外层状态 (支持闭包)
Proto *proto = nullptr;
std::uint8_t freeReg = 0;
int scopeDepth = 0;
DynArray<LocalVar> locals;
FuncState(String _name, FuncState *enc = nullptr) : name(std::move(_name)), enclosing(enc)
{
proto = new Proto();
}
// 注意:这里不 delete proto因为 proto 是要作为编译产物吐出去的
};
class Compiler
{
private:
String fileName;
SourceManager &manager;
FuncState *current = nullptr; // 永远指向当前正在编译的上下文
public:
Compiler(String _fileName, SourceManager &_manager) : fileName(std::move(_fileName)), manager(_manager)
{
// 初始化顶级作用域
current = new FuncState("global", nullptr);
}
~Compiler()
{
// 内存清理 (如果有异常中断)
while (current != nullptr)
{
FuncState *prev = current->enclosing;
delete current;
current = prev;
}
}
Result<Proto *, Error> Compile(Program *program);
private:
void PushState(String _name)
{
current = new FuncState(std::move(_name));
}
Proto *PopState()
{
FuncState *oldState = current;
Proto *finishedProto = oldState->proto;
current = oldState->enclosing;
delete oldState;
return finishedProto;
}
std::uint8_t AllocReg()
{
if (current->freeReg >= 250)
{
assert(false && "Register overflow!");
}
std::uint8_t reg = current->freeReg++;
if (current->freeReg > current->proto->maxStack)
{
current->proto->maxStack = current->freeReg;
}
return reg;
}
void FreeReg(std::uint8_t reg)
{
// 如果这个寄存器被局部变量使用,不释放直接 Return
for (const auto &local : current->locals)
{
if (local.reg == reg)
{
return; // 拒绝释放,保护局部变量生命周期
}
}
// 如果它是纯粹的临时计算结果),释放
if (reg == current->freeReg - 1)
{
current->freeReg--;
}
}
void Emit(Instruction inst)
{
current->proto->code.push_back(inst);
}
std::uint16_t AddConstant(Value v)
{
// TODO: 查重
current->proto->constants.push_back(v);
return static_cast<std::uint16_t>(current->proto->constants.size() - 1);
}
void BeginScope()
{
current->scopeDepth++;
}
void EndScope()
{
current->scopeDepth--;
while (!current->locals.empty() && current->locals.back().depth > current->scopeDepth)
{
FreeReg(current->locals.back().reg);
current->locals.pop_back();
}
}
bool HasLocalInCurrentScope(const String &name)
{
// 逆向查重
for (auto it = current->locals.rbegin(); it != current->locals.rend(); ++it)
{
if (it->depth < current->scopeDepth)
break; // 已经超出了当前深度,提前阻断
if (it->name == name)
return true;
}
return false;
}
bool HasLocal(const String &name)
{
for (auto it = current->locals.rbegin(); it != current->locals.rend(); ++it)
{
if (it->name == name)
{
if (it->depth == current->scopeDepth)
{
return true; // 同级不管 public直接捕获
}
else if (it->isPublic)
{
return true; // 不同级变量 public才能被捕捉
}
}
}
return false;
}
std::uint8_t ResolveLocal(const String &name)
{
// 变量遮蔽: 永远先使用同级已有的变量, 所以逆向遍历
for (auto it = current->locals.rbegin(); it != current->locals.rend(); ++it)
{
if (it->name == name)
{
if (it->depth < current->scopeDepth && !it->isPublic)
{
assert(false && "ResolveLocal: Attempt to access a private variable from an outer scope!");
}
return it->reg;
}
}
// 如果在本 Frame 没找到,那就是外层函数的变量 (闭包 Upvalue) 或者全局变量 (Global)。
assert(false && "ResolveLocal: Variable not found in current frame (Upvalue/Global not implemented yet)!");
return UINT8_MAX;
}
std::uint8_t DeclareLocal(bool isPublic, const String &name)
{
std::uint8_t reg = AllocReg();
current->locals.push_back(LocalVar{isPublic, name, reg, current->scopeDepth});
return reg;
}
std::uint8_t DeclareLocal(bool isPublic, const String &name, std::uint8_t reg)
{
current->locals.push_back(LocalVar{isPublic, name, reg, current->scopeDepth});
return reg;
}
SourceLocation makeSourceLocation(AstNode *node)
{
SourceLocation location = node->location; // copy
location.functionName = current->name;
location.fileName = fileName;
return location;
}
Result<std::uint8_t, Error> CompileIdentiExpr(IdentiExpr *);
Result<std::uint8_t, Error> CompileLiteral(LiteralExpr *);
Result<std::uint8_t, Error> CompileAssignment(InfixExpr *); // 编译赋值,由 CompileInfixExpr调用
Result<std::uint8_t, Error> CompileInfixExpr(InfixExpr *);
Result<std::uint8_t, Error> CompileLeftValue(Expr *); // 左值对象,可以是变量、结构体字段或模块对象
Result<std::uint8_t, Error> CompileExpr(Expr *);
Result<void, Error> CompileVarDecl(VarDecl *);
Result<void, Error> CompileStmt(Stmt *);
};
inline void DisassembleInstruction(Instruction inst, std::size_t index)
{
// 提取OpCode (低 8 位)
auto op = static_cast<OpCode>(inst & 0xFF);
std::string_view opName = magic_enum::enum_name(op);
// 所有指令至少都有 A 操作数 (8~15 位)
std::uint8_t a = (inst >> 8) & 0xFF;
// 地址补零,指令名左对齐占 10 字符
std::cout << std::format("{:04d} {:<10} ", index, opName);
switch (op)
{
case OpCode::Mov: {
// iABx 模式
std::uint16_t bx = (inst >> 16) & 0xFFFF;
std::cout << std::format("R{:<3} R[{}]", a, bx);
break;
}
case OpCode::LoadK: {
// iABx 模式:解析 Bx (16~31 位)
std::uint16_t bx = (inst >> 16) & 0xFFFF;
std::cout << std::format("R{:<3} K[{}]", a, bx);
break;
}
case OpCode::Add:
case OpCode::Sub:
case OpCode::Mul:
case OpCode::Div:
case OpCode::Mod: {
// iABC 模式:解析 B (16~23 位) 和 C (24~31 位)
std::uint8_t b = (inst >> 16) & 0xFF;
std::uint8_t c = (inst >> 24) & 0xFF;
std::cout << std::format("R{:<3} R{:<3} R{}", a, b, c);
break;
}
case OpCode::Return: {
// iA 模式:只用到了 A
std::cout << std::format("R{}", a);
break;
}
default: {
std::cout << "?";
break;
}
}
std::cout << '\n';
}
inline void DumpCode(const DynArray<Instruction> &code)
{
std::cout << "=== Bytecode ===\n";
for (std::size_t i = 0; i < code.size(); ++i)
{
DisassembleInstruction(code[i], i);
}
}
}; // namespace Fig