Files
Fig/src/Compiler/Compiler.hpp
PuqiAR 2631f76da1 feat: Implement compiler and virtual machine for Fig language
- Added Compiler class with methods for compiling programs, statements, and expressions.
- Introduced Proto structure to hold compiled bytecode and constants.
- Implemented expression compilation including literals, identifiers, and infix expressions.
- Developed statement compilation for variable declarations and expression statements.
- Created a VM class to execute compiled bytecode with support for arithmetic and comparison operations.
- Added Object and Value classes for handling different data types and memory management.
- Implemented String and Struct objects for enhanced data representation.
- Established a parser for parsing variable declarations and statements.
- Included tests for the VM and object representations.
2026-02-20 14:05:56 +08:00

305 lines
9.4 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*!
@file src/Compiler/Compiler.hpp
@brief 编译器定义
@author PuqiAR (im@puqiar.top)
@date 2026-02-19
*/
#pragma once
#include <Ast/Ast.hpp>
#include <Bytecode/Bytecode.hpp>
#include <Deps/Deps.hpp>
#include <Error/Error.hpp>
#include <Object/Object.hpp>
#include <SourceManager/SourceManager.hpp>
#include <cassert>
#include <iostream>
namespace Fig
{
// 编译产物
struct Proto
{
DynArray<Instruction> code;
DynArray<Value> constants;
std::uint8_t maxStack = 0; // 函数运行所需寄存器数量
};
struct LocalVar
{
bool isPublic; // 是否向上级/同级其他域公开
String name;
std::uint8_t reg; // 寄存器(相对 frame base 的寄存器 id)
int depth; // 作用域深度
};
// 任何跨函数、跨模块的编译,都压入弹出这个 State
struct FuncState
{
String name;
FuncState *enclosing = nullptr; // 指向外层状态 (支持闭包)
Proto *proto = nullptr;
std::uint8_t freeReg = 0;
int scopeDepth = 0;
DynArray<LocalVar> locals;
FuncState(String _name, FuncState *enc = nullptr) : name(std::move(_name)), enclosing(enc)
{
proto = new Proto();
}
// 注意:这里不 delete proto因为 proto 是要作为编译产物吐出去的
};
class Compiler
{
private:
String fileName;
SourceManager &manager;
FuncState *current = nullptr; // 永远指向当前正在编译的上下文
public:
Compiler(String _fileName, SourceManager &_manager) : fileName(std::move(_fileName)), manager(_manager)
{
// 初始化顶级作用域
current = new FuncState("global", nullptr);
}
~Compiler()
{
// 内存清理 (如果有异常中断)
while (current != nullptr)
{
FuncState *prev = current->enclosing;
delete current;
current = prev;
}
}
Result<Proto *, Error> Compile(Program *program);
private:
void PushState(String _name)
{
current = new FuncState(std::move(_name));
}
Proto *PopState()
{
FuncState *oldState = current;
Proto *finishedProto = oldState->proto;
current = oldState->enclosing;
delete oldState;
return finishedProto;
}
std::uint8_t AllocReg()
{
if (current->freeReg >= 250)
{
assert(false && "Register overflow!");
}
std::uint8_t reg = current->freeReg++;
if (current->freeReg > current->proto->maxStack)
{
current->proto->maxStack = current->freeReg;
}
return reg;
}
void FreeReg(std::uint8_t reg)
{
// 如果这个寄存器被局部变量使用,不释放直接 Return
for (const auto &local : current->locals)
{
if (local.reg == reg)
{
return; // 拒绝释放,保护局部变量生命周期
}
}
// 如果它是纯粹的临时计算结果),释放
if (reg == current->freeReg - 1)
{
current->freeReg--;
}
}
void Emit(Instruction inst)
{
current->proto->code.push_back(inst);
}
std::uint16_t AddConstant(Value v)
{
// TODO: 查重
current->proto->constants.push_back(v);
return static_cast<std::uint16_t>(current->proto->constants.size() - 1);
}
void BeginScope()
{
current->scopeDepth++;
}
void EndScope()
{
current->scopeDepth--;
while (!current->locals.empty() && current->locals.back().depth > current->scopeDepth)
{
FreeReg(current->locals.back().reg);
current->locals.pop_back();
}
}
bool HasLocalInCurrentScope(const String &name)
{
// 逆向查重
for (auto it = current->locals.rbegin(); it != current->locals.rend(); ++it)
{
if (it->depth < current->scopeDepth)
break; // 已经超出了当前深度,提前阻断
if (it->name == name)
return true;
}
return false;
}
bool HasLocal(const String &name)
{
for (auto it = current->locals.rbegin(); it != current->locals.rend(); ++it)
{
if (it->name == name)
{
if (it->depth == current->scopeDepth)
{
return true; // 同级不管 public直接捕获
}
else if (it->isPublic)
{
return true; // 不同级变量 public才能被捕捉
}
}
}
return false;
}
std::uint8_t ResolveLocal(const String &name)
{
// 变量遮蔽: 永远先使用同级已有的变量, 所以逆向遍历
for (auto it = current->locals.rbegin(); it != current->locals.rend(); ++it)
{
if (it->name == name)
{
if (it->depth < current->scopeDepth && !it->isPublic)
{
assert(false && "ResolveLocal: Attempt to access a private variable from an outer scope!");
}
return it->reg;
}
}
// 如果在本 Frame 没找到,那就是外层函数的变量 (闭包 Upvalue) 或者全局变量 (Global)。
assert(false && "ResolveLocal: Variable not found in current frame (Upvalue/Global not implemented yet)!");
return UINT8_MAX;
}
std::uint8_t DeclareLocal(bool isPublic, const String &name)
{
std::uint8_t reg = AllocReg();
current->locals.push_back(LocalVar{isPublic, name, reg, current->scopeDepth});
return reg;
}
std::uint8_t DeclareLocal(bool isPublic, const String &name, std::uint8_t reg)
{
current->locals.push_back(LocalVar{isPublic, name, reg, current->scopeDepth});
return reg;
}
SourceLocation makeSourceLocation(AstNode *node)
{
SourceLocation location = node->location; // copy
location.functionName = current->name;
location.fileName = fileName;
return location;
}
Result<std::uint8_t, Error> CompileIdentiExpr(IdentiExpr *);
Result<std::uint8_t, Error> CompileLiteral(LiteralExpr *);
Result<std::uint8_t, Error> CompileAssignment(InfixExpr *); // 编译赋值,由 CompileInfixExpr调用
Result<std::uint8_t, Error> CompileInfixExpr(InfixExpr *);
Result<std::uint8_t, Error> CompileLeftValue(Expr *); // 左值对象,可以是变量、结构体字段或模块对象
Result<std::uint8_t, Error> CompileExpr(Expr *);
Result<void, Error> CompileVarDecl(VarDecl *);
Result<void, Error> CompileStmt(Stmt *);
};
inline void DisassembleInstruction(Instruction inst, std::size_t index)
{
// 提取OpCode (低 8 位)
auto op = static_cast<OpCode>(inst & 0xFF);
std::string_view opName = magic_enum::enum_name(op);
// 所有指令至少都有 A 操作数 (8~15 位)
std::uint8_t a = (inst >> 8) & 0xFF;
// 地址补零,指令名左对齐占 10 字符
std::cout << std::format("{:04d} {:<10} ", index, opName);
switch (op)
{
case OpCode::Mov: {
// iABx 模式
std::uint16_t bx = (inst >> 16) & 0xFFFF;
std::cout << std::format("R{:<3} R[{}]", a, bx);
break;
}
case OpCode::LoadK: {
// iABx 模式:解析 Bx (16~31 位)
std::uint16_t bx = (inst >> 16) & 0xFFFF;
std::cout << std::format("R{:<3} K[{}]", a, bx);
break;
}
case OpCode::Add:
case OpCode::Sub:
case OpCode::Mul:
case OpCode::Div:
case OpCode::Mod: {
// iABC 模式:解析 B (16~23 位) 和 C (24~31 位)
std::uint8_t b = (inst >> 16) & 0xFF;
std::uint8_t c = (inst >> 24) & 0xFF;
std::cout << std::format("R{:<3} R{:<3} R{}", a, b, c);
break;
}
case OpCode::Return: {
// iA 模式:只用到了 A
std::cout << std::format("R{}", a);
break;
}
default: {
std::cout << "?";
break;
}
}
std::cout << '\n';
}
inline void DumpCode(const DynArray<Instruction> &code)
{
std::cout << "=== Bytecode ===\n";
for (std::size_t i = 0; i < code.size(); ++i)
{
DisassembleInstruction(code[i], i);
}
}
}; // namespace Fig