重构类型系统并改进诊断功能

- 更新了类型系统,新增了类型并优化了结构。
- 引入了基类型和派生类,用于函数、结构体和接口类型。
- 实现了类型上下文,用于管理内置类型和类型解析。
- 添加了诊断类,用于收集和报告警告和错误。
- 通过改进错误处理增强了虚拟机执行,以应对递归限制问题。
- 实现了反汇编器,将字节码转换为代码,以改善调试和分析。
- 添加了新的抽象语法树节点,用于成员表达式、对象初始化、接口和结构体定义。
- 引入了语义错误测试,包括重定义、未声明的变量和无效的结构字段。
This commit is contained in:
2026-03-10 12:33:17 +08:00
parent 90448006ff
commit 0f635ccf2b
47 changed files with 2365 additions and 2541 deletions

View File

@@ -2,7 +2,7 @@
@file src/Parser/Parser.hpp
@brief 语法分析器(Pratt + 手动递归下降) 定义
@author PuqiAR (im@puqiar.top)
@date 2026-02-14
@date 2026-03-08
*/
#pragma once
@@ -16,12 +16,10 @@
#include <cstddef>
#include <cstdlib>
#include <unordered_set>
namespace Fig
{
class Parser
{
private:
@@ -29,60 +27,56 @@ namespace Fig
Lexer &lexer;
SourceManager &srcManager;
size_t index = 0; // tokenbuffer下标
DynArray<Token> buffer;
size_t index = 0; // 当前 Tokenbuffer 中的下标
DynArray<Token> buffer; // 已从 Lexer 读取的 Token 缓存
String fileName;
bool isEOF = false;
bool isEOF = false;
// 惰性获取下一个 Token跳过注释
Token nextToken()
{
assert(!isEOF && "nextToken: eof but called nextToken");
if (index + 1 < buffer.size())
{
return buffer[++index];
}
auto result = lexer.NextToken();
if (!result)
if (isEOF)
return buffer[index];
while (true)
{
ReportError(result.error(), srcManager);
std::exit(-1);
auto result = lexer.NextToken();
if (!result)
{
ReportError(result.error(), srcManager);
std::exit(-1);
}
const Token &token = result.value();
if (token.type == TokenType::Comments)
continue; // 惰性跳过注释
if (token.type == TokenType::EndOfFile)
isEOF = true;
buffer.push_back(token);
index = buffer.size() - 1;
return buffer[index];
}
const Token &token = result.value();
if (token.type == TokenType::EndOfFile)
{
isEOF = true;
}
buffer.push_back(token);
index++;
return token;
}
inline Token prevToken()
{
if (buffer.size() < 2)
{
return currentToken();
}
return buffer[buffer.size() - 2];
return (index > 0) ? buffer[index - 1] : buffer[0];
}
inline Token currentToken()
{
if (buffer.empty())
{
return nextToken();
}
return buffer.back();
return buffer[index];
}
// 惰性窥视后续 Token
Token peekToken(size_t lookahead = 1)
{
assert(!isEOF && "peekToken: eof but called peekToken");
size_t peekIndex = index + lookahead;
while (peekIndex >= buffer.size() && !isEOF)
size_t targetIndex = index + lookahead;
while (targetIndex >= buffer.size() && !isEOF)
{
auto result = lexer.NextToken();
if (!result)
@@ -90,29 +84,22 @@ namespace Fig
ReportError(result.error(), srcManager);
std::abort();
}
const Token &token = result.value();
if (token.type == TokenType::EndOfFile)
{
if (result->type == TokenType::Comments)
continue;
if (result->type == TokenType::EndOfFile)
isEOF = true;
}
buffer.push_back(token);
buffer.push_back(*result);
}
if (peekIndex >= buffer.size()) // 没有那么多token
{
return buffer.back(); // back是EOF Token
}
return buffer[peekIndex];
return (targetIndex >= buffer.size()) ? buffer.back() : buffer[targetIndex];
}
inline Token consumeToken()
{
if (isEOF)
return buffer.back();
Token current = currentToken();
nextToken();
if (current.type != TokenType::EndOfFile)
nextToken();
return current;
}
inline bool match(TokenType type)
{
if (currentToken().type == type)
@@ -123,47 +110,18 @@ namespace Fig
return false;
}
inline Error makeUnexpectTokenError(const String &stmtType,
const String &expect,
const Token &tokenGot,
std::source_location loc = std::source_location::current())
{
return Error(ErrorType::SyntaxError,
std::format("expect '{}' in {}, got `{}`",
expect,
stmtType,
magic_enum::enum_name(tokenGot.type)),
"none",
makeSourceLocation(tokenGot),
loc);
}
inline Error makeExpectSemicolonError(
std::source_location loc = std::source_location::current())
{
return Error(ErrorType::SyntaxError,
"expect ';' after statement",
"insert ';'",
makeSourceLocation(currentToken()),
loc);
}
public:
struct State
{
enum StateType : std::uint8_t
{
Standby,
ParsingLiteralExpr,
ParsingIdentiExpr,
ParsingInfixExpr,
ParsingPrefixExpr,
ParsingIndexExpr,
ParsingCallExpr,
ParsingVarDecl,
ParsingIf,
ParsingWhile,
@@ -171,9 +129,7 @@ namespace Fig
ParsingReturn,
ParsingBreak,
ParsingContinue,
ParsingNamedTypeExpr,
} type = StateType::Standby;
std::unordered_set<TokenType> stopAt = {};
};
@@ -190,134 +146,121 @@ namespace Fig
return baseTerminators;
}
std::unordered_set<TokenType> &getTerminators() // 返回固定的终止符
std::unordered_set<TokenType> &getTerminators()
{
/*
Syntax terminators:
; ) ] } , EOF
*/
static std::unordered_set<TokenType> terminators(getBaseTerminators());
return terminators;
}
void resetTermintors()
{
getTerminators() = getBaseTerminators();
}
bool shouldTerminate() // 判断是否终结
{
const Token &token = currentToken();
const auto &terminators = getTerminators();
if (terminators.contains(token.type))
{
bool shouldTerminate()
{
const Token &token = currentToken();
if (getTerminators().contains(token.type))
return true;
}
for (auto it = stateStack.rbegin(); it < stateStack.rend(); ++it)
{
if (it->stopAt.contains(token.type))
{
return true;
}
}
return false;
}
DynArray<State> stateStack;
State &currentState()
State &currentState()
{
return stateStack.back();
}
void pushState(State _state)
{
stateStack.push_back(std::move(_state));
}
void popState()
{
if (!stateStack.empty())
{
stateStack.pop_back();
}
}
class StateProtector
struct StateProtector
{
Parser *parser;
public:
StateProtector(Parser *p, const State &newState) : parser(p)
Parser *p;
StateProtector(Parser *_p, State _s) : p(_p)
{
parser->pushState(newState);
p->pushState(_s);
}
~StateProtector()
{
parser->popState();
p->popState();
}
// 禁止拷贝
StateProtector(const StateProtector &) = delete;
StateProtector &operator=(const StateProtector &) = delete;
};
public:
Parser(Lexer &_lexer, SourceManager &_srcManager, String _fileName) :
lexer(_lexer), srcManager(_srcManager), fileName(std::move(_fileName))
{
pushState(State());
}
private:
SourceLocation makeSourceLocation(const Token &tok)
{
auto [line, column] = srcManager.GetLineColumn(tok.index);
// 物理防爆盾:防止因解析错位导致的异常列号引起终端 OOM
if (column > 5000)
column = 1;
return SourceLocation(SourcePosition(line, column, tok.length),
fileName,
"[internal parser]",
magic_enum::enum_name(currentState().type).data());
}
/* TypeExpressions */
inline Error makeUnexpectTokenError(const String &stmt, const String &exp, const Token &got)
{
return Error(ErrorType::SyntaxError,
std::format(
"expect '{}' in {}, got `{}`", exp, stmt, magic_enum::enum_name(got.type)),
"none",
makeSourceLocation(got));
}
Result<NamedTypeExpr *, Error> parseNamedTypeExpr(); // 当前token为identifier
inline Error makeExpectSemicolonError()
{
return Error(ErrorType::SyntaxError,
"expect ';' after statement",
"insert ';'",
makeSourceLocation(currentToken()));
}
Result<TypeExpr *, Error> parseTypeExpr();
/* Expressions */
Result<LiteralExpr *, Error> parseLiteralExpr(); // 当前token为literal时调用
Result<IdentiExpr *, Error> parseIdentiExpr(); // 当前token为Identifier调用
Result<InfixExpr *, Error> parseInfixExpr(
Expr *); // 由 parseExpression递归调用, 当前token为op
Result<PrefixExpr *, Error> parsePrefixExpr(); // 由 parseExpression递归调用, 当前token为op
Result<IndexExpr *, Error> parseIndexExpr(
Expr *); // 由 parseExpression调用, 当前token为 `[`
Result<CallExpr *, Error> parseCallExpr(Expr *); // 由 parseExpression调用, 当前token为 `(`
Result<TypeExpr *, Error> parseNamedTypeExpr();
Result<Expr *, Error> parseExpression(BindingPower = 0);
Result<Expr *, Error> parseLiteralExpr();
Result<Expr *, Error> parseIdentiExpr();
Result<Expr *, Error> parsePrefixExpr();
Result<Expr *, Error> parseInfixExpr(Expr *);
Result<Expr *, Error> parseIndexExpr(Expr *);
Result<Expr *, Error> parseCallExpr(Expr *);
Result<Expr *, Error> parseNewExpr();
/* Statements */
Result<BlockStmt *, Error> parseBlockStmt(); // 当前token为 {
Result<VarDecl *, Error> parseVarDecl(bool); // 由 parseStatement调用, 当前token为 var
Result<IfStmt *, Error> parseIfStmt(); // 由 parseStatement调用, 当前token为 if
Result<WhileStmt *, Error> parseWhileStmt(); // 由 parseStatement调用, 当前token为 while
Result<BlockStmt *, Error> parseBlockStmt();
Result<VarDecl *, Error> parseVarDecl(bool);
Result<IfStmt *, Error> parseIfStmt();
Result<WhileStmt *, Error> parseWhileStmt();
Result<DynArray<Param *>, Error> parseFnParams();
Result<FnDefStmt *, Error> parseFnDefStmt(bool);
Result<ReturnStmt *, Error> parseReturnStmt();
Result<DynArray<Param *>, Error> parseFnParams(); // 由 parseFnDefStmt或lambda调用
Result<FnDefStmt *, Error> parseFnDefStmt(bool); // 由 parseStatement调用, 当前token为 func
Result<Stmt *, Error> parseStructDef(bool);
Result<Stmt *, Error> parseInterfaceDef(bool);
Result<Stmt *, Error> parseImpl();
Result<ReturnStmt *, Error> parseReturnStmt(); // 由 parseStatement调用, 当前token为 return
// continue break直接由parseStatement一步解析
Result<Stmt *, Error> parseStatement();
Result<Stmt *, Error> parseStatement();
public:
Parser(Lexer &_lexer, SourceManager &_src, String _file) :
lexer(_lexer), srcManager(_src), fileName(std::move(_file))
{
pushState(State());
}
Result<Program *, Error> Parse();
};
#define SET_STOP_AT(...) currentState().stopAt = {__VA_ARGS__};
}; // namespace Fig
} // namespace Fig